diff --git a/.github/workflows/impl-generate.yml b/.github/workflows/impl-generate.yml
index 478de6eb99..7635825740 100644
--- a/.github/workflows/impl-generate.yml
+++ b/.github/workflows/impl-generate.yml
@@ -271,6 +271,13 @@ jobs:
             1. Read `plots/${{ steps.inputs.outputs.specification_id }}/metadata/${{ steps.inputs.outputs.library }}.yaml`
                - Look at `review.strengths` (keep these aspects!)
                - Look at `review.weaknesses` (fix these problems - decide HOW yourself)
+               - Look at `review.image_description` (understand what was generated visually)
+               - Look at `review.criteria_checklist` (see exactly which criteria failed)
+                 - Focus on categories with low scores (e.g., visual_quality.score < visual_quality.max)
+                 - Check items with `passed: false` - these need fixing
+                 - VQ-XX items for visual issues
+                 - SC-XX items for spec compliance
+                 - CQ-XX items for code quality
             2. Read `plots/${{ steps.inputs.outputs.specification_id }}/implementations/${{ steps.inputs.outputs.library }}.py`
                - Understand what was done before
                - Keep what worked, fix what didn't
@@ -346,6 +353,13 @@ jobs:
             1. Read `plots/${{ steps.inputs.outputs.specification_id }}/metadata/${{ steps.inputs.outputs.library }}.yaml`
                - Look at `review.strengths` (keep these aspects!)
                - Look at `review.weaknesses` (fix these problems - decide HOW yourself)
+               - Look at `review.image_description` (understand what was generated visually)
+               - Look at `review.criteria_checklist` (see exactly which criteria failed)
+                 - Focus on categories with low scores (e.g., visual_quality.score < visual_quality.max)
+                 - Check items with `passed: false` - these need fixing
+                 - VQ-XX items for visual issues
+                 - SC-XX items for spec compliance
+                 - CQ-XX items for code quality
             2. Read `plots/${{ steps.inputs.outputs.specification_id }}/implementations/${{ steps.inputs.outputs.library }}.py`
                - Understand what was done before
                - Keep what worked, fix what didn't
diff --git a/.github/workflows/impl-repair.yml b/.github/workflows/impl-repair.yml
index b03c87b4cd..1bc279eb88 100644
--- a/.github/workflows/impl-repair.yml
+++ b/.github/workflows/impl-repair.yml
@@ -127,6 +127,13 @@ jobs:
             2. `plots/${{ inputs.specification_id }}/metadata/${{ inputs.library }}.yaml` - Look at:
                - `review.strengths` (keep these aspects!)
                - `review.weaknesses` (fix these problems - decide HOW yourself)
+               - `review.image_description` (understand what was generated visually)
+               - `review.criteria_checklist` (see exactly which criteria failed)
+                 - Look for items with `passed: false` - these need fixing
+                 - Focus on categories with low scores (e.g., visual_quality.score < visual_quality.max)
+                 - VQ-XX items for visual issues
+                 - SC-XX items for spec compliance
+                 - CQ-XX items for code quality
 
             ### Step 2: Read reference files
             1. `prompts/library/${{ inputs.library }}.md` - Library-specific rules
@@ -192,6 +199,13 @@ jobs:
             2. `plots/${{ inputs.specification_id }}/metadata/${{ inputs.library }}.yaml` - Look at:
                - `review.strengths` (keep these aspects!)
                - `review.weaknesses` (fix these problems - decide HOW yourself)
+               - `review.image_description` (understand what was generated visually)
+               - `review.criteria_checklist` (see exactly which criteria failed)
+                 - Look for items with `passed: false` - these need fixing
+                 - Focus on categories with low scores (e.g., visual_quality.score < visual_quality.max)
+                 - VQ-XX items for visual issues
+                 - SC-XX items for spec compliance
+                 - CQ-XX items for code quality
 
             ### Step 2: Read reference files
             1. `prompts/library/${{ inputs.library }}.md` - Library-specific rules
diff --git a/.github/workflows/impl-review.yml b/.github/workflows/impl-review.yml
index 33f077d428..142e328cc4 100644
--- a/.github/workflows/impl-review.yml
+++ b/.github/workflows/impl-review.yml
@@ -206,12 +206,39 @@ jobs:
                # Save structured feedback as JSON (one array per file)
                echo '["Strength 1", "Strength 2"]' > review_strengths.json
                echo '["Weakness 1"]' > review_weaknesses.json
+
+               # Save verdict
+               echo "APPROVED" > review_verdict.txt  # or "REJECTED"
+
+               # Save image description (multi-line text)
+               cat > review_image_description.txt << 'EOF'
+               The plot shows a scatter plot with blue markers...
+               [Your full image description here]
+               EOF
+
+               # Save criteria checklist as structured JSON
+               cat > review_checklist.json << 'EOF'
+               {
+                 "visual_quality": {
+                   "score": 36,
+                   "max": 40,
+                   "items": [
+                     {"id": "VQ-01", "name": "Text Legibility", "score": 10, "max": 10, "passed": true, "comment": "All text readable"},
+                     {"id": "VQ-02", "name": "No Overlap", "score": 8, "max": 8, "passed": true, "comment": "No overlapping elements"}
+                   ]
+                 },
+                 "spec_compliance": {"score": 23, "max": 25, "items": [...]},
+                 "data_quality": {"score": 18, "max": 20, "items": [...]},
+                 "code_quality": {"score": 10, "max": 10, "items": [...]},
+                 "library_features": {"score": 5, "max": 5, "items": [...]}
+               }
+               EOF
                ```
 
             8. **DO NOT add ai-approved or ai-rejected labels** - the workflow will add them after updating metadata.
 
             **IMPORTANT**: Your review MUST include the "Image Description" section. A review without an image description will be considered invalid.
-            **IMPORTANT**: The Strengths/Weaknesses sections are saved to the metadata for future regeneration. Be specific!
+            **IMPORTANT**: All review data (strengths, weaknesses, image_description, criteria_checklist) is saved to metadata for future regeneration. Be specific!
 
       - name: Extract quality score
         id: score
@@ -266,21 +293,8 @@ jobs:
           git fetch origin "$BRANCH"
           git checkout -B "$BRANCH" "origin/$BRANCH"
 
-          # Read review feedback from JSON files (created by Claude)
-          STRENGTHS="[]"
-          WEAKNESSES="[]"
-
-          if [ -f "review_strengths.json" ]; then
-            STRENGTHS=$(cat review_strengths.json)
-          fi
-          if [ -f "review_weaknesses.json" ]; then
-            WEAKNESSES=$(cat review_weaknesses.json)
-          fi
-
           # Update metadata file with quality score, timestamp, and review feedback
           if [ -f "$METADATA_FILE" ]; then
-            # Update all metadata using Python for proper YAML handling
-            # Pass JSON via files to avoid shell escaping issues with quotes
             TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
 
             # Write Python script to temp file to avoid YAML/shell escaping issues
@@ -294,8 +308,12 @@ jobs:
           score = int(sys.argv[2])
           timestamp = sys.argv[3]
 
+          # Read existing review data files
           strengths = []
           weaknesses = []
+          image_description = None
+          criteria_checklist = None
+          verdict = None
 
           if Path('review_strengths.json').exists():
               try:
@@ -311,6 +329,28 @@ jobs:
               except:
                   pass
 
+          if Path('review_image_description.txt').exists():
+              try:
+                  with open('review_image_description.txt') as f:
+                      image_description = f.read().strip()
+              except:
+                  pass
+
+          if Path('review_checklist.json').exists():
+              try:
+                  with open('review_checklist.json') as f:
+                      criteria_checklist = json.load(f)
+              except:
+                  pass
+
+          if Path('review_verdict.txt').exists():
+              try:
+                  with open('review_verdict.txt') as f:
+                      verdict = f.read().strip()
+              except:
+                  pass
+
+          # Load existing metadata
           with open(metadata_file, 'r') as f:
               data = yaml.safe_load(f)
 
@@ -320,12 +360,24 @@ jobs:
           if 'review' not in data:
               data['review'] = {}
 
+          # Update review section with all fields
           data['review']['strengths'] = strengths
           data['review']['weaknesses'] = weaknesses
 
+          # Add extended review data (issue #2845)
+          if image_description:
+              data['review']['image_description'] = image_description
+          if criteria_checklist:
+              data['review']['criteria_checklist'] = criteria_checklist
+          if verdict:
+              data['review']['verdict'] = verdict
+
           def str_representer(dumper, data):
               if isinstance(data, str) and data.endswith('Z') and 'T' in data:
                   return dumper.represent_scalar('tag:yaml.org,2002:str', data, style="'")
+              # Use literal block style for multi-line strings (image_description)
+              if isinstance(data, str) and '\n' in data:
+                  return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
               return dumper.represent_scalar('tag:yaml.org,2002:str', data)
 
           yaml.add_representer(str, str_representer)
@@ -335,7 +387,7 @@ jobs:
           EOF
 
             python3 /tmp/update_metadata.py "$METADATA_FILE" "$SCORE" "$TIMESTAMP"
-            echo "::notice::Updated metadata with quality score ${SCORE} and review feedback"
+            echo "::notice::Updated metadata with quality score ${SCORE} and extended review data"
           fi
 
           # Update implementation header with quality score
diff --git a/CLAUDE.md b/CLAUDE.md
index 962d5d2402..ee7742c094 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -308,6 +308,34 @@ quality_score: 92
 
 # Review feedback (used for regeneration)
 review:
+  # AI's visual description of the generated plot
+  image_description: |
+    The plot shows a scatter plot with 100 data points displaying
+    a positive correlation. Points are rendered in blue with 70%
+    opacity. Axes are clearly labeled and a subtle grid is visible.
+
+  # Detailed scoring breakdown by category
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+        - id: VQ-01
+          name: Text Legibility
+          score: 10
+          max: 10
+          passed: true
+          comment: "All text readable at full size"
+    spec_compliance:
+      score: 23
+      max: 25
+      items: [...]
+    # ... data_quality, code_quality, library_features
+
+  # Final verdict
+  verdict: APPROVED
+
+  # Summary feedback
   strengths:
     - "Clean code structure"
     - "Good use of alpha for overlapping points"
@@ -329,6 +357,7 @@ Quality: 92/100 | Created: 2025-01-10
 - Spec-level tracking in `specification.yaml`: `created`, `updated`, `issue`, `suggested`, `tags`
 - Per-library metadata in separate files (no merge conflicts!)
 - **Review feedback** stored in metadata for regeneration (AI reads previous feedback to improve)
+- **Extended review data**: `image_description`, `criteria_checklist`, and `verdict` for targeted fixes
 - Contributors credited via `suggested` field
 - Tags are at spec level (same for all libraries)
 - Per-library metadata updated automatically by `impl-review.yml` (quality score, review feedback)
diff --git a/alembic/versions/6345896e2e90_add_extended_review_fields.py b/alembic/versions/6345896e2e90_add_extended_review_fields.py
new file mode 100644
index 0000000000..a705fd9ff5
--- /dev/null
+++ b/alembic/versions/6345896e2e90_add_extended_review_fields.py
@@ -0,0 +1,45 @@
+"""add_extended_review_fields
+
+Add extended review data fields to impls table for issue #2845:
+- review_image_description: AI's visual description of the plot
+- review_criteria_checklist: Detailed per-criterion scoring breakdown
+- review_verdict: "APPROVED" or "REJECTED"
+
+Revision ID: 6345896e2e90
+Revises: d0c76553a5cc
+Create Date: 2026-01-01
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision: str = "6345896e2e90"
+down_revision: Union[str, None] = "d0c76553a5cc"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Add extended review data columns to impls table."""
+    # Add review_image_description (text field for AI's visual description)
+    op.add_column("impls", sa.Column("review_image_description", sa.Text(), nullable=True))
+
+    # Add review_criteria_checklist (JSONB for detailed scoring breakdown)
+    op.add_column("impls", sa.Column("review_criteria_checklist", postgresql.JSONB(), nullable=True))
+
+    # Add review_verdict (short string: "APPROVED" or "REJECTED")
+    op.add_column("impls", sa.Column("review_verdict", sa.String(20), nullable=True))
+
+
+def downgrade() -> None:
+    """Remove extended review data columns from impls table."""
+    op.drop_column("impls", "review_verdict")
+    op.drop_column("impls", "review_criteria_checklist")
+    op.drop_column("impls", "review_image_description")
diff --git a/app/src/components/FilterBar.tsx b/app/src/components/FilterBar.tsx
index 7144e08b1e..550a64709e 100644
--- a/app/src/components/FilterBar.tsx
+++ b/app/src/components/FilterBar.tsx
@@ -139,8 +139,7 @@ export function FilterBar({
   const handleValueSelect = useCallback(
     (category: FilterCategory, value: string) => {
       onAddFilter(category, value);
-      onTrackEvent('filter_add', { category, value });
-      // Track search if query was used
+      // Track search if query was used (filter changes tracked via pageview)
       if (searchQuery.trim()) {
         onTrackEvent('search', { query: searchQuery.trim(), category });
       }
@@ -169,39 +168,33 @@ export function FilterBar({
   const handleRemoveValue = useCallback(
     (value: string) => {
       if (activeGroupIndex !== null) {
-        const group = activeFilters[activeGroupIndex];
         onRemoveFilter(activeGroupIndex, value);
-        onTrackEvent('filter_remove', { category: group?.category || '', value });
       }
       setChipMenuAnchor(null);
       setActiveGroupIndex(null);
     },
-    [activeGroupIndex, activeFilters, onRemoveFilter, onTrackEvent]
+    [activeGroupIndex, onRemoveFilter]
   );
 
   // Remove entire group
   const handleRemoveGroup = useCallback(() => {
     if (activeGroupIndex !== null) {
-      const group = activeFilters[activeGroupIndex];
       onRemoveGroup(activeGroupIndex);
-      onTrackEvent('filter_remove_group', { category: group?.category || '' });
     }
     setChipMenuAnchor(null);
     setActiveGroupIndex(null);
-  }, [activeGroupIndex, activeFilters, onRemoveGroup, onTrackEvent]);
+  }, [activeGroupIndex, onRemoveGroup]);
 
   // Add value to existing group (OR)
   const handleAddValueToExistingGroup = useCallback(
     (value: string) => {
       if (activeGroupIndex !== null) {
-        const group = activeFilters[activeGroupIndex];
         onAddValueToGroup(activeGroupIndex, value);
-        onTrackEvent('filter_add_or', { category: group?.category || '', value });
       }
       setChipMenuAnchor(null);
       setActiveGroupIndex(null);
     },
-    [activeGroupIndex, activeFilters, onAddValueToGroup, onTrackEvent]
+    [activeGroupIndex, onAddValueToGroup]
   );
 
   // Memoize search results to avoid recalculating on every render
@@ -209,6 +202,28 @@ export function FilterBar({
     () => getSearchResults(filterCounts, activeFilters, searchQuery, selectedCategory),
     [filterCounts, activeFilters, searchQuery, selectedCategory]
   );
+
+  // Track searches with no results (debounced, to discover missing specs)
+  const lastTrackedQueryRef = useRef<string>('');
+  useEffect(() => {
+    const query = searchQuery.trim();
+    // Only track if: query >= 2 chars, no results, not already tracked this query
+    if (query.length >= 2 && searchResults.length === 0 && query !== lastTrackedQueryRef.current) {
+      const timer = setTimeout(() => {
+        onTrackEvent('search_no_results', { query });
+        lastTrackedQueryRef.current = query;
+      }, 500);
+      return () => clearTimeout(timer);
+    }
+  }, [searchQuery, searchResults.length, onTrackEvent]);
+
+  // Reset tracked query when dropdown closes
+  useEffect(() => {
+    if (!dropdownAnchor) {
+      lastTrackedQueryRef.current = '';
+    }
+  }, [dropdownAnchor]);
+
   // Only open if anchor is valid and in document
   const isDropdownOpen = Boolean(dropdownAnchor) && document.body.contains(dropdownAnchor);
   const hasQuery = searchQuery.trim().length > 0;
@@ -349,10 +364,7 @@ export function FilterBar({
             key={`${group.category}-${index}`}
             label={displayLabel}
             onClick={(e) => handleChipClick(e, index)}
-            onDelete={() => {
-              onRemoveGroup(index);
-              onTrackEvent('filter_remove_group', { category: group.category });
-            }}
+            onDelete={() => onRemoveGroup(index)}
             deleteIcon={<CloseIcon sx={{ fontSize: '1rem !important' }} />}
             sx={{
               fontFamily: '"MonoLisa", "MonoLisa Fallback", monospace',
diff --git a/app/src/components/FullscreenModal.tsx b/app/src/components/FullscreenModal.tsx
index 03c3065d0d..58840e3d2a 100644
--- a/app/src/components/FullscreenModal.tsx
+++ b/app/src/components/FullscreenModal.tsx
@@ -34,7 +34,7 @@ export function FullscreenModal({ image, selectedSpec, onClose, onTrackEvent }:
   const { copied, copyToClipboard, reset: resetCopied } = useCopyCode({
     onCopy: () => {
       const specId = selectedSpec || image?.spec_id;
-      onTrackEvent?.('copy_code', { spec: specId, library: image?.library, method: 'modal' });
+      onTrackEvent?.('copy_code', { spec: specId, library: image?.library, method: 'button' });
     },
   });
 
@@ -92,13 +92,13 @@ export function FullscreenModal({ image, selectedSpec, onClose, onTrackEvent }:
   // Track native copy events (Ctrl+C, Cmd+C)
   const handleNativeCopy = useCallback(() => {
     const specId = selectedSpec || image?.spec_id;
-    onTrackEvent?.('copy_code', { spec: specId, library: image?.library, method: 'native' });
+    onTrackEvent?.('copy_code', { spec: specId, library: image?.library, method: 'keyboard' });
   }, [onTrackEvent, selectedSpec, image?.library, image?.spec_id]);
 
   // Track contextmenu (right-click) - user may copy from context menu
   const handleContextMenu = useCallback(() => {
     const specId = selectedSpec || image?.spec_id;
-    onTrackEvent?.('copy_code', { spec: specId, library: image?.library, method: 'contextmenu' });
+    onTrackEvent?.('copy_code', { spec: specId, library: image?.library, method: 'keyboard' });
   }, [onTrackEvent, selectedSpec, image?.library, image?.spec_id]);
 
   // Download image via backend proxy
@@ -119,10 +119,8 @@ export function FullscreenModal({ image, selectedSpec, onClose, onTrackEvent }:
 
   const handleClose = useCallback(() => {
     setShowCode(false);
-    const specId = selectedSpec || image?.spec_id;
-    onTrackEvent?.('modal_close', { spec: specId, library: image?.library });
     onClose();
-  }, [onClose, onTrackEvent, selectedSpec, image?.library, image?.spec_id]);
+  }, [onClose]);
 
   return (
     <Modal
diff --git a/app/src/components/ImageCard.tsx b/app/src/components/ImageCard.tsx
index 823270b0fe..1260ef0339 100644
--- a/app/src/components/ImageCard.tsx
+++ b/app/src/components/ImageCard.tsx
@@ -89,7 +89,7 @@ export const ImageCard = memo(function ImageCard({
       if (code) {
         await navigator.clipboard.writeText(code);
         setCopyState('copied');
-        onTrackEvent?.('copy_code', { spec: image.spec_id, library: image.library });
+        onTrackEvent?.('copy_code', { spec: image.spec_id, library: image.library, method: 'card' });
         setTimeout(() => setCopyState('idle'), 2000);
       } else {
         setCopyState('idle');
@@ -214,9 +214,6 @@ export const ImageCard = memo(function ImageCard({
             onClick={(e) => {
               e.stopPropagation();
               onTooltipToggle(isSpecTooltipOpen ? null : specTooltipId);
-              if (!isSpecTooltipOpen) {
-                onTrackEvent?.('description_spec', { spec: image.spec_id });
-              }
             }}
             sx={{
               fontSize: labelFontSize,
@@ -287,9 +284,6 @@ export const ImageCard = memo(function ImageCard({
                 onClick={(e) => {
                   e.stopPropagation();
                   onTooltipToggle(isLibTooltipOpen ? null : libTooltipId);
-                  if (!isLibTooltipOpen) {
-                    onTrackEvent?.('description_lib', { library: image.library });
-                  }
                 }}
                 sx={{
                   fontSize: labelFontSize,
diff --git a/automation/scripts/backfill_review_metadata.py b/automation/scripts/backfill_review_metadata.py
new file mode 100755
index 0000000000..5af2177875
--- /dev/null
+++ b/automation/scripts/backfill_review_metadata.py
@@ -0,0 +1,403 @@
+#!/usr/bin/env python3
+"""
+Backfill extended review data in metadata YAML files from PR comments.
+
+This script searches through merged PRs to find AI Review comments and extracts:
+- image_description
+- criteria_checklist
+- verdict
+
+IMPORTANT: When multiple AI Review comments exist (repair attempts),
+always takes the LAST one (the one that led to merge).
+
+Usage:
+    python automation/scripts/backfill_review_metadata.py --dry-run
+    python automation/scripts/backfill_review_metadata.py --execute
+
+Requires:
+    - gh CLI authenticated
+    - PyYAML installed
+"""
+
+import argparse
+import json
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+
+def run_gh_command(args: list[str]) -> dict | list | str:
+    """Run a gh CLI command and return JSON output."""
+    cmd = ["gh"] + args
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        return None
+    try:
+        return json.loads(result.stdout)
+    except json.JSONDecodeError:
+        return result.stdout.strip()
+
+
+def find_merged_pr_for_implementation(spec_id: str, library: str) -> dict | None:
+    """Find the merged PR for a given spec_id and library."""
+    # Search for PRs with the implementation branch pattern
+    branch_pattern = f"implementation/{spec_id}/{library}"
+
+    prs = run_gh_command([
+        "pr", "list",
+        "--state", "merged",
+        "--head", branch_pattern,
+        "--json", "number,headRefName,mergedAt,comments",
+        "--limit", "1"
+    ])
+
+    if prs and len(prs) > 0:
+        return prs[0]
+
+    # Fallback: search by title pattern
+    prs = run_gh_command([
+        "pr", "list",
+        "--state", "merged",
+        "--search", f"feat({library}): implement {spec_id}",
+        "--json", "number,headRefName,mergedAt",
+        "--limit", "5"
+    ])
+
+    if prs:
+        for pr in prs:
+            if spec_id in pr.get("headRefName", "") and library in pr.get("headRefName", ""):
+                return pr
+
+    return None
+
+
+def get_pr_comments(pr_number: int) -> list[dict]:
+    """Get all comments from a PR."""
+    comments = run_gh_command([
+        "pr", "view", str(pr_number),
+        "--json", "comments",
+        "-q", ".comments"
+    ])
+    return comments if comments else []
+
+
+def parse_ai_review_comment(comment_body: str) -> dict | None:
+    """
+    Parse an AI Review comment to extract structured data.
+
+    Returns dict with:
+        - image_description: str
+        - criteria_checklist: dict
+        - verdict: str
+        - strengths: list[str]
+        - weaknesses: list[str]
+    """
+    if "## AI Review" not in comment_body:
+        return None
+
+    result = {
+        "image_description": None,
+        "criteria_checklist": None,
+        "verdict": None,
+        "strengths": [],
+        "weaknesses": [],
+    }
+
+    # Extract Image Description (multi-line quote block)
+    img_desc_match = re.search(
+        r"### Image Description\s*\n((?:>\s*.*\n?)+)",
+        comment_body,
+        re.MULTILINE
+    )
+    if img_desc_match:
+        # Remove leading > and whitespace from each line
+        lines = img_desc_match.group(1).strip().split("\n")
+        cleaned_lines = [re.sub(r"^>\s*", "", line) for line in lines]
+        result["image_description"] = "\n".join(cleaned_lines).strip()
+
+    # Extract Verdict
+    verdict_match = re.search(r"### Verdict:\s*(APPROVED|REJECTED)", comment_body, re.IGNORECASE)
+    if verdict_match:
+        result["verdict"] = verdict_match.group(1).upper()
+
+    # Extract Strengths
+    strengths_match = re.search(r"### Strengths\s*\n((?:[-*]\s+.*\n?)+)", comment_body, re.MULTILINE)
+    if strengths_match:
+        lines = strengths_match.group(1).strip().split("\n")
+        result["strengths"] = [re.sub(r"^[-*]\s+", "", line).strip() for line in lines if line.strip()]
+
+    # Extract Weaknesses
+    weaknesses_match = re.search(r"### Weaknesses\s*\n((?:[-*]\s+.*\n?)+)", comment_body, re.MULTILINE)
+    if weaknesses_match:
+        lines = weaknesses_match.group(1).strip().split("\n")
+        result["weaknesses"] = [re.sub(r"^[-*]\s+", "", line).strip() for line in lines if line.strip()]
+
+    # Extract Criteria Checklist
+    result["criteria_checklist"] = parse_criteria_checklist(comment_body)
+
+    return result
+
+
+def parse_criteria_checklist(comment_body: str) -> dict | None:
+    """
+    Parse the criteria checklist from the AI Review comment.
+
+    Format in comment:
+    **Visual Quality (36/40 pts)**
+    - [x] VQ-01: Text Legibility (10) - All text readable ✓
+    - [ ] VQ-02: No Overlap (0/8) - Some elements overlap
+    """
+    checklist = {}
+
+    # Define category patterns
+    categories = {
+        "visual_quality": r"\*\*Visual Quality \((\d+)/(\d+) pts?\)\*\*",
+        "spec_compliance": r"\*\*Spec Compliance \((\d+)/(\d+) pts?\)\*\*",
+        "data_quality": r"\*\*Data Quality \((\d+)/(\d+) pts?\)\*\*",
+        "code_quality": r"\*\*Code Quality \((\d+)/(\d+) pts?\)\*\*",
+        "library_features": r"\*\*Library Features \((\d+)/(\d+) pts?\)\*\*",
+    }
+
+    # Item pattern: - [x] VQ-01: Name (score) - comment
+    # or: - [ ] VQ-01: Name (score/max) - comment
+    item_pattern = re.compile(
+        r"- \[([ xX])\] ([A-Z]{2}-\d+): ([^(]+)\((\d+)(?:/(\d+))?\)\s*[-–]?\s*(.*?)(?=\n|$)"
+    )
+
+    for cat_key, cat_pattern in categories.items():
+        cat_match = re.search(cat_pattern, comment_body)
+        if cat_match:
+            cat_score = int(cat_match.group(1))
+            cat_max = int(cat_match.group(2))
+
+            # Find the section for this category
+            cat_start = cat_match.end()
+            next_cat = None
+            for other_key, other_pattern in categories.items():
+                if other_key != cat_key:
+                    other_match = re.search(other_pattern, comment_body[cat_start:])
+                    if other_match:
+                        if next_cat is None or other_match.start() < next_cat:
+                            next_cat = other_match.start()
+
+            if next_cat:
+                section = comment_body[cat_start:cat_start + next_cat]
+            else:
+                # Find next section header (### )
+                next_section = re.search(r"\n###\s", comment_body[cat_start:])
+                if next_section:
+                    section = comment_body[cat_start:cat_start + next_section.start()]
+                else:
+                    section = comment_body[cat_start:]
+
+            items = []
+            for match in item_pattern.finditer(section):
+                checked = match.group(1).lower() == "x"
+                item_id = match.group(2)
+                item_name = match.group(3).strip()
+                item_score = int(match.group(4))
+                item_max = int(match.group(5)) if match.group(5) else item_score if checked else 0
+                item_comment = match.group(6).strip() if match.group(6) else ""
+
+                # Clean up comment (remove trailing checkmark or x)
+                item_comment = re.sub(r"\s*[✓✗✔✘]$", "", item_comment)
+
+                items.append({
+                    "id": item_id,
+                    "name": item_name,
+                    "score": item_score,
+                    "max": item_max if item_max > 0 else item_score,
+                    "passed": checked,
+                    "comment": item_comment,
+                })
+
+            checklist[cat_key] = {
+                "score": cat_score,
+                "max": cat_max,
+                "items": items,
+            }
+
+    return checklist if checklist else None
+
+
+def update_metadata_file(metadata_path: Path, review_data: dict, dry_run: bool) -> bool:
+    """
+    Update a metadata YAML file with extended review data.
+
+    Preserves existing fields, only adds/updates review section.
+    """
+    import yaml
+
+    if not metadata_path.exists():
+        print(f"  ⚠️  Metadata file not found: {metadata_path}")
+        return False
+
+    with open(metadata_path) as f:
+        data = yaml.safe_load(f)
+
+    if "review" not in data:
+        data["review"] = {}
+
+    # Update with new data (only if not None)
+    if review_data.get("image_description"):
+        data["review"]["image_description"] = review_data["image_description"]
+    if review_data.get("criteria_checklist"):
+        data["review"]["criteria_checklist"] = review_data["criteria_checklist"]
+    if review_data.get("verdict"):
+        data["review"]["verdict"] = review_data["verdict"]
+
+    # Also update strengths/weaknesses if missing
+    if review_data.get("strengths") and not data["review"].get("strengths"):
+        data["review"]["strengths"] = review_data["strengths"]
+    if review_data.get("weaknesses") and not data["review"].get("weaknesses"):
+        data["review"]["weaknesses"] = review_data["weaknesses"]
+
+    if dry_run:
+        print(f"  📝 Would update: {metadata_path}")
+        if review_data.get("image_description"):
+            print(f"      - image_description: {len(review_data['image_description'])} chars")
+        if review_data.get("criteria_checklist"):
+            print(f"      - criteria_checklist: {len(review_data['criteria_checklist'])} categories")
+        if review_data.get("verdict"):
+            print(f"      - verdict: {review_data['verdict']}")
+        return True
+
+    # Custom representer for multi-line strings
+    def str_representer(dumper, data):
+        if isinstance(data, str) and "\n" in data:
+            return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+        if isinstance(data, str) and data.endswith("Z") and "T" in data:
+            return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="'")
+        return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+
+    yaml.add_representer(str, str_representer)
+
+    with open(metadata_path, "w") as f:
+        yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
+
+    print(f"  ✅ Updated: {metadata_path}")
+    return True
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Backfill extended review data from PR comments")
+    parser.add_argument("--dry-run", action="store_true", help="Preview changes without modifying files")
+    parser.add_argument("--execute", action="store_true", help="Actually modify files")
+    parser.add_argument("--spec-id", help="Process only this spec ID")
+    parser.add_argument("--library", help="Process only this library")
+    args = parser.parse_args()
+
+    if not args.dry_run and not args.execute:
+        print("Error: Must specify --dry-run or --execute")
+        sys.exit(1)
+
+    dry_run = args.dry_run
+
+    # Find all metadata files
+    plots_dir = Path("plots")
+    if not plots_dir.exists():
+        print("Error: plots/ directory not found. Run from repository root.")
+        sys.exit(1)
+
+    metadata_files = list(plots_dir.glob("*/metadata/*.yaml"))
+    print(f"Found {len(metadata_files)} metadata files")
+
+    # Filter if spec-id or library specified
+    if args.spec_id:
+        metadata_files = [f for f in metadata_files if args.spec_id in str(f)]
+    if args.library:
+        metadata_files = [f for f in metadata_files if f.stem == args.library]
+
+    print(f"Processing {len(metadata_files)} files...")
+
+    updated = 0
+    skipped = 0
+    errors = 0
+
+    for metadata_file in sorted(metadata_files):
+        # Extract spec_id and library from path
+        # Path: plots/{spec-id}/metadata/{library}.yaml
+        spec_id = metadata_file.parent.parent.name
+        library = metadata_file.stem
+
+        print(f"\n📦 {spec_id}/{library}")
+
+        # Check if already has extended review data
+        try:
+            with open(metadata_file) as f:
+                existing_data = yaml.safe_load(f)
+            existing_review = existing_data.get("review", {}) if existing_data else {}
+            if existing_review.get("image_description") and existing_review.get("criteria_checklist"):
+                print(f"  ✓ Already has extended review data, skipping")
+                skipped += 1
+                continue
+        except Exception:
+            pass  # Continue with backfill if we can't read
+
+        # Find the merged PR
+        pr = find_merged_pr_for_implementation(spec_id, library)
+        if not pr:
+            print(f"  ⏭️  No merged PR found")
+            skipped += 1
+            continue
+
+        pr_number = pr["number"]
+        print(f"  🔗 Found PR #{pr_number}")
+
+        # Get all comments
+        comments = get_pr_comments(pr_number)
+        if not comments:
+            print(f"  ⏭️  No comments found")
+            skipped += 1
+            continue
+
+        # Filter for AI Review comments
+        review_comments = [c for c in comments if "## AI Review" in c.get("body", "")]
+        if not review_comments:
+            print(f"  ⏭️  No AI Review comments found")
+            skipped += 1
+            continue
+
+        print(f"  📝 Found {len(review_comments)} AI Review comment(s)")
+
+        # Take the LAST AI Review comment (the one that led to merge)
+        # Sort by createdAt and take the last one
+        review_comments.sort(key=lambda c: c.get("createdAt", ""))
+        final_review = review_comments[-1]
+
+        # Parse the review
+        review_data = parse_ai_review_comment(final_review["body"])
+        if not review_data:
+            print(f"  ⚠️  Failed to parse review comment")
+            errors += 1
+            continue
+
+        # Check if there's anything new to add
+        has_new_data = (
+            review_data.get("image_description") or
+            review_data.get("criteria_checklist") or
+            review_data.get("verdict")
+        )
+
+        if not has_new_data:
+            print(f"  ⏭️  No extended data found in review")
+            skipped += 1
+            continue
+
+        # Update the metadata file
+        if update_metadata_file(metadata_file, review_data, dry_run):
+            updated += 1
+        else:
+            errors += 1
+
+    print(f"\n{'=' * 50}")
+    print(f"Summary: {updated} updated, {skipped} skipped, {errors} errors")
+
+    if dry_run:
+        print("\n⚠️  DRY RUN - no files were modified")
+        print("Run with --execute to apply changes")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/automation/scripts/sync_to_postgres.py b/automation/scripts/sync_to_postgres.py
index 95192ab777..c8da3a6bb6 100644
--- a/automation/scripts/sync_to_postgres.py
+++ b/automation/scripts/sync_to_postgres.py
@@ -284,6 +284,10 @@ def scan_plot_directory(plot_dir: Path) -> dict | None:
                     # Review feedback
                     "review_strengths": review.get("strengths") or [],
                     "review_weaknesses": review.get("weaknesses") or [],
+                    # Extended review data (issue #2845)
+                    "review_image_description": review.get("image_description"),
+                    "review_criteria_checklist": review.get("criteria_checklist"),
+                    "review_verdict": review.get("verdict"),
                 }
             )
 
diff --git a/core/database/models.py b/core/database/models.py
index 61b40533db..475d5aeb57 100644
--- a/core/database/models.py
+++ b/core/database/models.py
@@ -100,6 +100,11 @@ class Impl(Base):
     review_strengths: Mapped[list[str]] = mapped_column(StringArray, default=list)  # What's good
     review_weaknesses: Mapped[list[str]] = mapped_column(StringArray, default=list)  # What needs work
 
+    # Extended review data (from issue #2845)
+    review_image_description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)  # AI's visual description
+    review_criteria_checklist: Mapped[Optional[dict]] = mapped_column(UniversalJSON, nullable=True)  # Detailed scoring
+    review_verdict: Mapped[Optional[str]] = mapped_column(String(20), nullable=True)  # "APPROVED" or "REJECTED"
+
     # System
     updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), onupdate=func.now())
 
diff --git a/docs/architecture/repository.md b/docs/architecture/repository.md
index c63a9ba4b5..87e737ef3f 100644
--- a/docs/architecture/repository.md
+++ b/docs/architecture/repository.md
@@ -240,14 +240,63 @@ quality_score: 92
 
 # Review feedback (used by AI for regeneration)
 review:
-  strengths: ["Clean code structure"]
-  weaknesses: ["Grid could be more subtle"]
+  # AI's visual description of the generated plot
+  image_description: |
+    The plot shows a scatter plot with 100 data points displaying
+    a positive correlation. Points are rendered in blue with 70%
+    opacity. Axes are clearly labeled and a subtle grid is visible.
+
+  # Detailed scoring breakdown by category
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+        - id: VQ-01
+          name: Text Legibility
+          score: 10
+          max: 10
+          passed: true
+          comment: "All text readable at full size"
+        - id: VQ-02
+          name: No Overlap
+          score: 8
+          max: 8
+          passed: true
+          comment: "No element overlap detected"
+    spec_compliance:
+      score: 23
+      max: 25
+      items: [...]
+    data_quality:
+      score: 18
+      max: 20
+      items: [...]
+    code_quality:
+      score: 10
+      max: 10
+      items: [...]
+    library_features:
+      score: 5
+      max: 5
+      items: [...]
+
+  # Final verdict
+  verdict: APPROVED
+
+  # Summary feedback
+  strengths:
+    - "Clean code structure"
+    - "Good use of alpha for overlapping points"
+  weaknesses:
+    - "Grid could be more subtle"
 ```
 
 **Key Points**:
 - Each library has its own file (no merge conflicts!)
 - Created by `impl-generate.yml`, updated by `impl-review.yml`
 - Review feedback persisted for AI to improve on regeneration
+- Extended review data includes `image_description`, `criteria_checklist`, and `verdict` for targeted fixes
 
 ### GCS Storage
 
diff --git a/plots/alluvial-basic/metadata/altair.yaml b/plots/alluvial-basic/metadata/altair.yaml
index d18a641d0f..990266e50e 100644
--- a/plots/alluvial-basic/metadata/altair.yaml
+++ b/plots/alluvial-basic/metadata/altair.yaml
@@ -30,3 +30,185 @@ review:
   weaknesses:
   - Node labels are abbreviated (Con, Lib, Prog, Ind) which slightly reduces immediate
     comprehension; full names would be preferable if space permits
+  image_description: 'The plot displays an alluvial diagram visualizing voter migration
+    between political parties across 4 U.S. election cycles (2012, 2016, 2020, 2024).
+    Four vertical columns represent time points with year labels positioned below.
+    Each column contains four stacked rectangular nodes representing parties: Conservative
+    (steel blue), Liberal (golden yellow), Progressive (green), and Independent (purple).
+    Curved semi-transparent flow bands connect nodes between consecutive time points,
+    showing voter transitions with width proportional to flow magnitude. Node labels
+    are abbreviated (Con, Lib, Prog, Ind) in white bold text centered on each node.
+    A legend on the right side identifies the party colors. The title "alluvial-basic
+    · altair · pyplots.ai" appears centered at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, year labels, and legend are clearly readable. Node labels
+          are readable but slightly small due to abbreviation.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; flows use transparency to handle visual
+          overlap well
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes and flows are well-sized; flow transparency at 0.5 works well
+          for showing crossings
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, purple palette is colorblind-safe with good
+          differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; legend positioned appropriately
+          on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels (expected for alluvial diagrams, but year labels serve
+          as column headers)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean background, legend well-styled with border and appropriate
+          sizing
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct alluvial diagram with vertical time ordering and flow bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time points as columns, categories as nodes, flows showing transitions
+          correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has time point headers, category labels, proportional band widths,
+          transparency for overlaps
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the canvas
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four parties
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: Title format is correct "alluvial-basic · altair · pyplots.ai" ✓
+          (restoring 2 points)
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows multiple flow types: party retention (large flows staying
+          within category), party switching (cross-flows), and varying proportions
+          over time'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Election voter migration is a highly realistic scenario mentioned
+          in the spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in hundreds/thousands of voters are plausible; transitions
+          show realistic patterns (most voters stay, some switch)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Generally flat structure but uses multiple loops for data transformation
+          which is acceptable for this complex chart type
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded flow values), no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as both plot.png and plot.html correctly ✓ (restoring 1 point)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Altair features: layered chart composition, mark_line
+          with filled polygons, mark_rect for nodes, mark_text for labels, declarative
+          encoding with detail and order for proper polygon rendering, tooltips on
+          nodes, interactive() for zoom/pan, proper scale configuration'
+  verdict: APPROVED
diff --git a/plots/alluvial-basic/metadata/bokeh.yaml b/plots/alluvial-basic/metadata/bokeh.yaml
index dfc408b2d7..76f2add140 100644
--- a/plots/alluvial-basic/metadata/bokeh.yaml
+++ b/plots/alluvial-basic/metadata/bokeh.yaml
@@ -25,3 +25,184 @@ review:
   - Legend is positioned far right creating slight visual imbalance
   - Node labels only shown for first and last time points; middle time points could
     benefit from value annotations
+  image_description: |-
+    The plot displays a basic alluvial diagram showing voter migration between four political parties (Democratic, Republican, Independent, Other) across four election years (2012, 2016, 2020, 2024). The visualization uses:
+    - **Colors**: Blue (#306998) for Democratic, Red (#D62728) for Republican, Yellow (#FFD43B) for Independent, Gray (#7F7F7F) for Other
+    - **Title**: "alluvial-basic · bokeh · pyplots.ai" centered at top
+    - **Subtitle**: "Voter Migration Between Parties (values in millions)" in gray below the title
+    - **Layout**: Four vertical time columns with rectangular nodes representing party sizes, connected by smooth Bezier curve bands showing flow transitions
+    - **Labels**: Left side shows starting values (Democratic 42M, Republican 37M, Independent 15M, Other 6M), right side shows ending values (Democratic 48M, Republican 40M, Independent 13M, Other 2M)
+    - **Time labels**: Bold year labels (2012, 2016, 2020, 2024) at the bottom
+    - **Legend**: Located on the right side showing all four party colors
+    - **Flow bands**: Semi-transparent (alpha 0.5) with color matching the source category, clearly showing transitions between parties
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title is 32pt, labels are 20-24pt. Slightly
+          smaller than ideal for some elements but all clearly legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; labels are well-positioned on left
+          and right sides.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes and flow bands are clearly visible with good sizing. Transparency
+          (0.5) works well for overlapping flows.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Red/Yellow/Gray palette is colorblind-friendly; good contrast
+          between categories.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot is well-centered with appropriate
+          margins. Legend placement is functional but creates some asymmetry.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Time points clearly labeled; category labels include values with
+          units (M for millions).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate for alluvial), legend is well-styled with good
+          font size and background.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct alluvial/Sankey-style diagram with vertical ordering by time.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time points on X-axis, categories vertically stacked, flow widths
+          proportional to values.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: time points, categories, values, flows
+          with proper proportions.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible; y-range properly calculated to show all nodes.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four party categories with matching
+          colors.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "alluvial-basic · bokeh · pyplots.ai".
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bidirectional flows, party retention, and cross-party migration.
+          Could show more dramatic shifts to highlight the visualization's capabilities.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Election voter migration is a plausible scenario; values are in reasonable
+          range but slightly simplified.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in millions of voters are realistic for national elections;
+          proportions are sensible.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure; no functions or classes.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data.
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Has one unused import: Label is used but some imports like ColumnDataSource
+          mentioned in library rules aren''t used (though not imported). Minor: could
+          be slightly cleaner.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's patch() for flow bands and quad() for nodes effectively.
+          Creates custom Bezier curves manually. Could leverage more Bokeh-specific
+          features like HoverTool for interactivity.
+  verdict: APPROVED
diff --git a/plots/alluvial-basic/metadata/highcharts.yaml b/plots/alluvial-basic/metadata/highcharts.yaml
index 7957547f21..850e243d4f 100644
--- a/plots/alluvial-basic/metadata/highcharts.yaml
+++ b/plots/alluvial-basic/metadata/highcharts.yaml
@@ -24,3 +24,175 @@ review:
   - Title includes extra text before required format; should be just spec-id · library
     · pyplots.ai
   - Subtitle font size could be slightly larger for better readability at 4800x2700
+  image_description: 'The plot displays an alluvial diagram visualizing voter migration
+    between three political parties (Conservative, Moderate, Progressive) across three
+    election cycles (2016, 2020, 2024). The diagram uses a colorblind-safe palette:
+    blue (#306998) for Conservative, purple (#9467BD) for Moderate, and yellow (#FFD43B)
+    for Progressive. Nodes are arranged in three vertical columns representing each
+    year, with curved bands connecting nodes to show voter flow transitions. The title
+    "Voter Migration · alluvial-basic · highcharts · pyplots.ai" appears at the top
+    with a descriptive subtitle. Year labels (2016, 2020, 2024) appear at the bottom
+    of each column, and party names label each node. A horizontal legend at the bottom
+    identifies the three party colors.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, labels, and legend are clearly readable; subtitle slightly
+          small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all node labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes and flows are appropriately sized; transparency (0.4) helps
+          with overlapping flows
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette using blue, purple, yellow (no red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight imbalance with 2024 labels close
+          to edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for alluvial; year labels serve as axis labels (acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom; no distracting grid elements
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct alluvial/sankey diagram with strict vertical ordering
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time points as columns, parties as categories, flows as band widths
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: time ordering, consistent colors, proportional
+          bands'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible; flows proportional to values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three parties
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Contains spec-id, library, pyplots.ai but includes extra "Voter Migration"
+          prefix
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows transitions in all directions: retention, gain, and loss between
+          parties'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Voter migration is a perfect application; data patterns are plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Thousands of voters is appropriate scale; values are realistic
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → render'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Data is deterministic but no explicit seed comment; flows data is
+          hardcoded
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Highcharts sankey module with column positioning, annotations,
+          custom legend HTML, and interactive tooltips
+  verdict: APPROVED
diff --git a/plots/alluvial-basic/metadata/letsplot.yaml b/plots/alluvial-basic/metadata/letsplot.yaml
index 2ab649857b..e1a36f941b 100644
--- a/plots/alluvial-basic/metadata/letsplot.yaml
+++ b/plots/alluvial-basic/metadata/letsplot.yaml
@@ -27,3 +27,183 @@ review:
     KISS flat script style
   - Yellow (Independents) could be more distinguishable from gray (Non-Voters) in
     certain viewing conditions
+  image_description: The plot displays an alluvial diagram showing voter migration
+    between four political affiliations (Democrats, Republicans, Independents, Non-Voters)
+    across three US election cycles (2016, 2020, 2024). Three vertical columns represent
+    each election year, with colored rectangular nodes indicating party/voter status.
+    Blue represents Democrats (bottom), red for Republicans, yellow for Independents,
+    and gray for Non-Voters (top). Curved bands flow between nodes showing voter transitions
+    - flows are colored by destination party with ~55% transparency. The left side
+    shows party labels, while the right side shows party names with final 2024 totals
+    in millions (Democrats 85M, Republicans 78M, Independents 10M, Non-Voters 58M).
+    Title follows the required format at top-left.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, year headers, and party labels are clearly readable. Font
+          sizes are appropriate for the canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Labels are well-positioned outside
+          the plot area.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes and flows are clearly visible. Flow bands have good alpha (0.55)
+          for overlap handling. Some minor flow crossings are hard to trace.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, red, yellow, gray palette is distinguishable. Red-blue distinction
+          works for most colorblind types, though red-green sensitive users may have
+          some difficulty with yellow-gray distinction.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, balanced whitespace.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for alluvial diagrams; party labels and year headers serve this
+          purpose and are descriptive with context (values in millions).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend shown (legend_position="none"), but colors are identifiable
+          via labels. Grid appropriately hidden for this plot type.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct alluvial diagram with vertical time ordering and flow bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time points as columns, categories as nodes, flow magnitudes as band
+          widths
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows transitions across 3 time points, 4 categories, proportional
+          band widths
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, flows span full range between nodes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Direct labeling replaces legend effectively
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "alluvial-basic · letsplot · pyplots.ai" format correctly
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple flow directions (retention, migration between parties,
+          mobilization from non-voters). Could show more dramatic shifts for variety.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: US election voter migration is a perfect, comprehensible real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in millions are plausible for US electorate. Some totals seem
+          reasonable though exact numbers are illustrative.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses helper functions (calculate_node_positions, add_flows) which
+          slightly deviates from pure KISS style, but keeps code organized
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random seed needed (all values hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct scaling
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_polygon for custom shapes, scale_fill_manual,
+          theme customization. However, lets-plot does not have a native alluvial
+          geom, so manual polygon construction was necessary. Good use of ggsize and
+          ggsave with scale parameter.
+  verdict: APPROVED
diff --git a/plots/alluvial-basic/metadata/matplotlib.yaml b/plots/alluvial-basic/metadata/matplotlib.yaml
index 854ca8c135..2b762bafc1 100644
--- a/plots/alluvial-basic/metadata/matplotlib.yaml
+++ b/plots/alluvial-basic/metadata/matplotlib.yaml
@@ -24,3 +24,175 @@ review:
   weaknesses:
   - Legend uses Independent but nodes use Indep abbreviation - should be consistent
   - Node labels could be slightly larger for the smallest nodes
+  image_description: 'The plot shows a well-designed alluvial diagram visualizing
+    voter migration across 4 election cycles (2012, 2016, 2020, 2024). Four political
+    categories are displayed as stacked rectangular nodes at each time point: Party
+    A (dark blue, #306998), Party B (yellow, #FFD43B), Party C (teal, #4ECDC4), and
+    Independent (gray, #95A5A6). Semi-transparent curved flow bands connect the nodes
+    between consecutive time points, showing how voters migrate between parties. Each
+    node is labeled with the party name and voter count in thousands. The title follows
+    the required format with spec-id, library, and pyplots.ai. A legend in the lower
+    left identifies the four categories. The layout is clean with good use of whitespace
+    and the flows are visually clear.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, year labels, node labels all readable; node labels slightly
+          small for smallest nodes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Flow bands well-sized with appropriate alpha (0.4); nodes clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are colorblind-friendly (blue, yellow, teal, gray)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for alluvial diagrams (no traditional axes), but time points
+          labeled clearly
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well placed in lower left, no grid needed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct alluvial diagram with vertical ordering and flow bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time points as columns, categories as stacked nodes, flows proportional
+          to magnitude
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: multiple time points, consistent colors,
+          proportional band widths, transparency for flows'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the chart bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend labels match categories but "Indep." abbreviation in nodes
+          differs from "Independent" in legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{context} · alluvial-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple transitions, persistence within parties, and cross-party
+          migrations; could show more dramatic shifts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Voter migration is a perfect real-world scenario for alluvial diagrams
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in thousands of voters are realistic and sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of matplotlib.patches (PathPatch, Rectangle) and Path for
+          custom bezier curves; demonstrates matplotlib's flexibility for custom visualizations
+  verdict: APPROVED
diff --git a/plots/alluvial-basic/metadata/plotly.yaml b/plots/alluvial-basic/metadata/plotly.yaml
index 4ad5567b83..fa368280c5 100644
--- a/plots/alluvial-basic/metadata/plotly.yaml
+++ b/plots/alluvial-basic/metadata/plotly.yaml
@@ -27,3 +27,177 @@ review:
     could be increased for better readability
   - The y-position ordering places Conservative at top but the visual shows it with
     largest bar - consider if ordering by size would improve readability
+  image_description: 'The plot displays a Sankey/alluvial diagram showing voter migration
+    between four political parties (Conservative, Liberal, Progressive, Independent)
+    across four US election cycles (2012, 2016, 2020, 2024). The diagram has a white
+    background with year labels at the top of each column in bold. Each party is represented
+    by a distinct color: Conservative (blue #306998), Liberal (yellow #FFD43B), Progressive
+    (green #2CA02C), and Independent (purple #9467BD). Vertical bars at each time
+    point show the relative size of each party''s voter base, with semi-transparent
+    flow bands connecting them to show voter migration patterns. The Conservative
+    party (blue) has the largest node at each time point, positioned at the top. The
+    flows show mostly stable voter retention with some cross-party movement. A horizontal
+    legend at the bottom identifies each party color. The title "Voter Migration ·
+    alluvial-basic · plotly · pyplots.ai" is centered at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and year labels are very readable; node labels on bars are
+          slightly small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes and flow bands are appropriately sized and visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors that are colorblind-friendly (blue, yellow,
+          green, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot fills most of the area with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Sankey/alluvial diagrams (no axes), but year labels serve
+          similar purpose
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean horizontal legend, no grid needed for this chart type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct alluvial/Sankey diagram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time points as columns, categories as nodes, flows as connections
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: time ordering, consistent colors, proportional
+          band widths, transparency for flows'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 4 time points and 4 categories fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four parties
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{spec-id} · {library} · pyplots.ai" format correctly
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows voter retention AND migration between parties, demonstrates
+          the alluvial concept well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: US voter migration is a perfect real-world use case for alluvial
+          diagrams
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Voter counts in hundreds are reasonable for a simplified model
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random), but no explicit seed statement
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png AND plot.html (correct)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Plotly's go.Sankey with custom hover templates,
+          node positioning, and interactive HTML export
+  verdict: APPROVED
diff --git a/plots/alluvial-basic/metadata/plotnine.yaml b/plots/alluvial-basic/metadata/plotnine.yaml
index 63f1d296b4..9752273069 100644
--- a/plots/alluvial-basic/metadata/plotnine.yaml
+++ b/plots/alluvial-basic/metadata/plotnine.yaml
@@ -26,3 +26,165 @@ review:
   - Data is deterministic so no seed needed, but explicit data definition could use
     a comment explaining the voter transition matrix
   - Small flows between 2016-2018 are less visible compared to later periods
+  image_description: The plot displays an alluvial diagram showing voter migration
+    between three political parties (Democrats in blue, Republicans in red/coral,
+    Independent in yellow) across four election cycles (2016, 2018, 2020, 2022). Vertical
+    rectangular nodes represent party affiliations at each time point, with smooth
+    curved flow bands connecting them to visualize voter transitions. White voter
+    count labels (45, 43, 41, 14, etc.) appear inside nodes. Party names are displayed
+    on both left and right edges. Year labels are positioned at the bottom. A legend
+    on the right side identifies the three party colors.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and bold, year labels are clear, node counts visible,
+          party labels readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Flow bands visible with good alpha transparency, nodes clearly defined
+          with white borders
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, red, and yellow are colorblind-distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills ~60% of canvas, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels (but appropriate for this plot type with custom annotations)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean minimal theme with no grid (appropriate), well-placed legend
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct alluvial diagram with vertical time ordering
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time points as columns, parties as categories, voter counts as values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Flows show transitions, band width proportional to flow magnitude,
+          transparency for overlapping flows
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within the visualization
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend shows "Party" but the title context is voter migration
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "{topic} · alluvial-basic · plotnine · pyplots.ai" format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows party retention (large same-party flows) and migration (smaller
+          cross-party flows), but some small flows filtered out
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Election data with years as time points, parties as categories -
+          real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Voter counts in reasonable ranges (~14-45 per party per year), realistic
+          proportions
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed set (data is deterministic, so this is minor)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/alluvial-basic/metadata/pygal.yaml b/plots/alluvial-basic/metadata/pygal.yaml
index 51f1637f17..f61a7deb72 100644
--- a/plots/alluvial-basic/metadata/pygal.yaml
+++ b/plots/alluvial-basic/metadata/pygal.yaml
@@ -23,3 +23,172 @@ review:
   - Subtitle font could be slightly larger for better readability
   - Title uses spaces around middot which is acceptable but slightly different from
     spec
+  image_description: The plot shows an alluvial diagram titled "alluvial-basic · pygal
+    · pyplots.ai" displaying voter migration between political parties across four
+    election cycles (2012, 2016, 2020, 2024). Four vertical bars represent each time
+    point, with stacked segments for Democratic (blue), Republican (red), Independent
+    (yellow), and Other (gray) parties. Semi-transparent curved bands connect the
+    bars showing voter flow between parties. Party labels appear on both left and
+    right sides in matching colors. Year labels appear below each column. A subtitle
+    at the bottom reads "Voter Migration Between Political Parties (Millions of Voters)".
+    The color scheme is colorblind-safe with good contrast.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are readable, subtitle could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels well positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments and flow bands are well sized and visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with blue, red, yellow, gray
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas but slight extra whitespace at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No traditional axes (appropriate for alluvial), subtitle serves as
+          description
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Party labels on sides serve as legend, no distracting grid
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct alluvial diagram with time-ordered columns and flow bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time points as columns, categories as stacked segments, flows as
+          bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: time ordering, consistent colors, proportional
+          bands, transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All four time points and all categories visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Party labels correctly colored and positioned
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format "alluvial-basic · pygal · pyplots.ai" but uses spaces
+          around middot
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple flows including party retention and crossover migration
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: US election data with plausible voter migration scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic voter counts in millions matching actual election magnitudes
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Generally flat structure but has some complexity due to manual SVG
+          generation
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses pygal as base
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal XY chart as canvas and custom SVG injection for alluvial
+          elements; creative workaround since pygal lacks native alluvial support
+  verdict: APPROVED
diff --git a/plots/alluvial-basic/metadata/seaborn.yaml b/plots/alluvial-basic/metadata/seaborn.yaml
index 891733586f..a8c8f2f307 100644
--- a/plots/alluvial-basic/metadata/seaborn.yaml
+++ b/plots/alluvial-basic/metadata/seaborn.yaml
@@ -24,3 +24,176 @@ review:
   - No formal legend box (inline labels work but a legend would be cleaner)
   - Limited use of seaborn core plotting functions since alluvial diagrams require
     custom matplotlib patches
+  image_description: 'The plot displays an alluvial diagram showing US voter migration
+    across 4 election cycles (2012, 2016, 2020, 2024). Four vertical stacked bars
+    represent each year, with segments colored by party: Democratic (blue), Republican
+    (orange), Independent (green), and Other (gray). The colorblind-safe palette from
+    seaborn is used. Curved flow bands connect the bars showing voter transitions
+    between parties, with band widths proportional to flow magnitude. The title follows
+    the required format "alluvial-basic · seaborn · pyplots.ai" at the top. Labels
+    on the left (2012) and right (2024) edges show party names with voter counts in
+    millions. Each year column header shows the year and total voters. An italicized
+    subtitle at the bottom explains the data context.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable, good font sizes, labels clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels positioned outside bars
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Flow bands visible with appropriate alpha, smaller flows use higher
+          alpha for visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's colorblind-safe palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for alluvial (no traditional axes), but year headers and party
+          labels are descriptive with units (M for millions)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No formal legend, though colors are labeled directly on bars
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct alluvial diagram with vertical columns and curved flows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time points as columns, categories as stacked segments, flows between
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: time points left-to-right, consistent
+          colors, proportional band widths, transparency for flows'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, years 2012-2024 shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: No separate legend provided; labels are inline but a legend would
+          improve clarity
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "alluvial-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows party retention (large same-party flows), cross-party migration
+          (visible transitions), varying flow sizes, multiple time transitions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: US voter data across election years is a comprehensible, real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Voter counts in realistic millions range (60-80M for major parties)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Generally linear flow, though code is longer due to alluvial complexity;
+          no classes but uses nested loops
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_style, sns.set_context, sns.color_palette; however,
+          the core plotting uses matplotlib patches rather than seaborn plot functions
+          (alluvial is not a native seaborn plot type)
+  verdict: APPROVED
diff --git a/plots/andrews-curves/metadata/altair.yaml b/plots/andrews-curves/metadata/altair.yaml
index 556507a9c1..001c91af26 100644
--- a/plots/andrews-curves/metadata/altair.yaml
+++ b/plots/andrews-curves/metadata/altair.yaml
@@ -26,3 +26,180 @@ review:
   - Yellow color (#FFD43B) could be slightly less bright for better visibility on
     white background
   - Could add tooltips to enable interactive exploration of individual observations
+  image_description: 'The plot displays Andrews curves for the Iris dataset with 150
+    observations transformed into Fourier series curves. Three iris species are color-coded:
+    Setosa in yellow/gold (#FFD43B), Versicolor in blue (#306998), and Virginica in
+    olive green (#6B8E23). The x-axis shows "t (radians)" ranging from approximately
+    -π to π (-3.4 to 3.2), and the y-axis displays "Andrews Curve Value" ranging from
+    about -4 to 6. The title follows the required format: "Iris Classification · andrews-curves
+    · altair · pyplots.ai" with a descriptive subtitle. The legend is positioned in
+    the upper right corner. The curves demonstrate clear visual separation between
+    species, with Setosa (yellow) forming a distinct cluster near y=0, while Versicolor
+    and Virginica show overlapping wave patterns at higher amplitudes.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title, axis labels, and legend are clearly readable. Font sizes
+          are appropriate for the canvas size. Minor: tick labels could be slightly
+          larger.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. All labels and legend are well-positioned.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Curves are visible with appropriate opacity (0.4). strokeWidth=2
+          works well for the data density. Very slight difficulty distinguishing individual
+          curves in dense regions.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and olive green provide reasonable distinction. Yellow
+          on white background could be slightly challenging in some contexts.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend is appropriately
+          positioned near the data.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"t (radians)" includes units, "Andrews Curve Value" is descriptive.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No visible grid (which is acceptable for this plot type), legend
+          is well-placed but could benefit from a background for better contrast.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Andrews curves implementation using Fourier series transformation.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=t parameter, Y=Andrews curve value, correctly implemented.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: normalized variables, transparency (alpha=0.4),
+          color by category, t range from -π to π.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all three species.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title includes spec-id "andrews-curves", library "altair", and "pyplots.ai".
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cluster separation between species, demonstrates the Fourier
+          transformation well. Could potentially show outlier detection more explicitly.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris dataset is a classic, appropriate example for Andrews curves
+          as mentioned in the spec.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Standardized data produces sensible curve ranges. 150 observations
+          is within the recommended 30-150 range.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → transformation → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (altair, numpy, pandas, sklearn).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (correct for Altair).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding with detail for grouping curves,
+          proper mark_line with opacity. Could leverage more Altair-specific features
+          like tooltips or interactive selection.
+  verdict: APPROVED
diff --git a/plots/andrews-curves/metadata/bokeh.yaml b/plots/andrews-curves/metadata/bokeh.yaml
index 8f295cca1b..03df36868e 100644
--- a/plots/andrews-curves/metadata/bokeh.yaml
+++ b/plots/andrews-curves/metadata/bokeh.yaml
@@ -28,3 +28,174 @@ review:
   - Legend background could have slightly higher opacity for better readability
   - Does not leverage Bokeh interactive features like hover tooltips which would enhance
     the visualization
+  image_description: The plot displays Andrews curves for the Iris dataset with 150
+    observations across three species. Blue curves (#306998) represent one group,
+    yellow curves (#FFD43B) another, and teal curves (#2AA198) the third. The x-axis
+    shows "t (radians)" ranging from -π to π, and the y-axis shows "f(t)" ranging
+    approximately from -4 to 6. The title "andrews-curves · bokeh · pyplots.ai" appears
+    at the top left. A legend on the right identifies Setosa, Versicolor, and Virginica
+    species. All curves show smooth Fourier transformations with appropriate transparency
+    (alpha=0.4) to reveal density patterns. Dashed grid lines provide subtle reference
+    without being distracting.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are readable, font sizes appropriately
+          scaled for 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Curves are visible with good line_width=2 and alpha=0.4; slight density
+          in overlapping regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and teal are colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout, plot fills canvas well, legend positioned on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "t (radians)" and "f(t)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed style and alpha=0.3; legend well placed
+          but could have better background contrast
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Andrews curves using Fourier series transformation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly transformed to Fourier coefficients
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: normalization, transparency, color by
+          category, t from -π to π'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all curves without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three species
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "andrews-curves · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cluster separation well; Setosa clearly distinct, Versicolor/Virginica
+          show expected overlap
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses canonical Iris dataset, perfect real-world botanical example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: StandardScaler applied correctly; values are realistic for normalized
+          data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Has one helper function (andrews_curve) which breaks strict KISS;
+          could be inline
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses sklearn.datasets.load_iris() which is deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, figure, Legend properly; could leverage hover
+          tools for interactivity
+  verdict: APPROVED
diff --git a/plots/andrews-curves/metadata/highcharts.yaml b/plots/andrews-curves/metadata/highcharts.yaml
index 26912d0293..9c53f5b25b 100644
--- a/plots/andrews-curves/metadata/highcharts.yaml
+++ b/plots/andrews-curves/metadata/highcharts.yaml
@@ -23,3 +23,16 @@ review:
   weaknesses:
   - Helper function andrews_curve() deviates from pure KISS structure (minor)
   - Y-axis label f(t) could be more descriptive for general audience
+  image_description: The plot displays Andrews curves for the Iris dataset with three
+    species (Setosa, Versicolor, Virginica) shown in different colors. The x-axis
+    represents t (radians) ranging from -3 to 3, and the y-axis represents f(t) ranging
+    from approximately -5 to 6. Blue curves represent Setosa, yellow/gold curves represent
+    Versicolor, and purple curves represent Virginica. The curves show distinctive
+    wave patterns that help distinguish species - Setosa curves (blue) tend to have
+    lower amplitude and cluster together, while Versicolor (yellow) and Virginica
+    (purple) show more overlap but with distinguishable patterns. A vertical legend
+    is positioned in the upper right corner with a white background. The title reads
+    "Iris Species · andrews-curves · highcharts · pyplots.ai". Grid lines are subtle
+    and visible. Transparency is applied to the curves allowing overlapping patterns
+    to be visible.
+  verdict: APPROVED
diff --git a/plots/andrews-curves/metadata/letsplot.yaml b/plots/andrews-curves/metadata/letsplot.yaml
index bcc6421e27..0d1c8cb80e 100644
--- a/plots/andrews-curves/metadata/letsplot.yaml
+++ b/plots/andrews-curves/metadata/letsplot.yaml
@@ -27,3 +27,174 @@ review:
     them, but subtle grid would help)
   - Missing explicit random seed (though data is deterministic from sklearn, good
     practice to include)
+  image_description: The plot displays Andrews curves for the Iris dataset, showing
+    150 Fourier series curves colored by species. Blue curves represent setosa (tightly
+    clustered in lower range), yellow curves represent versicolor (middle range),
+    and red/orange curves represent virginica (spreading into higher values). The
+    X-axis shows parameter t from -π to π with proper Greek letter labels at key intervals
+    (-π, -π/2, 0, π/2, π). The Y-axis displays Fourier Function Value ranging approximately
+    from -5 to 5.5. The title correctly follows the format "andrews-curves · letsplot
+    · pyplots.ai". A legend on the right identifies the three species. Transparency
+    (alpha=0.4) effectively reveals density patterns where curves overlap.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend text are all clearly
+          readable at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are visible with good transparency; could be slightly thicker
+          for better visibility at high density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red are distinguishable but yellow-red may be challenging
+          for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Parameter t (radians)" and "Fourier
+          Function Value"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is not visible (missing grid lines reduce visual reference)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Andrews curves visualization with Fourier transformation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly mapped to Fourier coefficients, category to color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalization, transparency, color by category, t range from -π to
+          π all implemented
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies species
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "andrews-curves · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows all 150 observations, clear cluster separation visible between
+          species
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses classic Iris dataset, a neutral scientific dataset
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalized data with appropriate Fourier transform values
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → transform → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: Missing np.random.seed(42); uses sklearn dataset but transformation
+          should still be deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, scale_x_continuous with custom labels, theme
+          customization, but no interactive-specific features
+  verdict: APPROVED
diff --git a/plots/andrews-curves/metadata/matplotlib.yaml b/plots/andrews-curves/metadata/matplotlib.yaml
index 02e775ea9f..70712f9d14 100644
--- a/plots/andrews-curves/metadata/matplotlib.yaml
+++ b/plots/andrews-curves/metadata/matplotlib.yaml
@@ -26,3 +26,170 @@ review:
     library guidelines
   - Legend sample lines use alpha=0.8 while actual curves use alpha=0.4, creating
     visual inconsistency
+  image_description: 'The plot shows Andrews curves visualization using the Iris dataset
+    with 150 observations (50 per species). Three distinct colors are used: blue (#306998)
+    for Setosa, yellow (#FFD43B) for Versicolor, and coral/pink (#E06C75) for Virginica.
+    The x-axis displays t values from -π to π in radians with clear labels at -π,
+    -π/2, 0, π/2, π. The y-axis shows f(t) values ranging approximately from -4 to
+    5. The title "andrews-curves · matplotlib · pyplots.ai" is displayed at the top.
+    A legend in the upper right identifies the three species. The curves clearly show
+    Setosa (blue) clustering distinctly from the other two species, demonstrating
+    good separation, while Versicolor (yellow) and Virginica (coral) overlap significantly
+    but have some separation in certain regions. A subtle dashed grid (alpha=0.3)
+    aids readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines at linewidth=1.5 with alpha=0.4 work well for 150 curves, though
+          slightly thin per library guidelines (recommends linewidth=2-4)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/coral palette is mostly colorblind-safe, though yellow
+          can be hard to distinguish from coral for some types of colorblindness
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"t (radians)" and "f(t)" are descriptive but f(t) could be more
+          explanatory (e.g., "Andrews Function Value")'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha=0.3, but legend alpha=0.8 sample lines differ
+          from plotted alpha=0.4 curves
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Andrews curves visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly transformed using Fourier expansion
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalized data, appropriate alpha for overlapping curves, colored
+          by category, t from -π to π
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All curves visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies species
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "andrews-curves · matplotlib · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows cluster separation (Setosa distinct) and overlap patterns (Versicolor/Virginica),
+          demonstrating Andrews curves' utility for multivariate comparison
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses classic Iris dataset, a standard benchmark in data visualization
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: StandardScaler normalization produces appropriate f(t) range
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No np.random.seed(42), though sklearn's Iris is deterministic so
+          output is reproducible
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/andrews-curves/metadata/plotly.yaml b/plots/andrews-curves/metadata/plotly.yaml
index c71d20c682..8cfdff2261 100644
--- a/plots/andrews-curves/metadata/plotly.yaml
+++ b/plots/andrews-curves/metadata/plotly.yaml
@@ -26,3 +26,177 @@ review:
     rule; should inline the transformation logic
   - Yellow color for Versicolor may be less distinguishable for colorblind users;
     consider using a more accessible palette
+  image_description: 'The plot displays Andrews curves for the Iris dataset with 150
+    samples transformed into Fourier series curves. Three species are shown: Setosa
+    (blue), Versicolor (yellow/gold), and Virginica (red/coral). The x-axis shows
+    parameter t in radians from -π to π with proper π notation tick labels. The y-axis
+    shows f(t) in normalized units ranging from approximately -4 to 4. The title correctly
+    reads "Iris Dataset · andrews-curves · plotly · pyplots.ai". The legend is well-positioned
+    in the upper right corner with a subtle border. Curves show clear visual separation
+    between species - Setosa curves (blue) cluster distinctly from Versicolor and
+    Virginica, which show some natural overlap. Transparency (alpha=0.4) effectively
+    reveals density patterns where curves converge and diverge.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend positioned away from data
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width=2 with 0.4 opacity perfectly suited for 150 overlapping
+          curves
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/red distinguishable, though yellow could be harder for
+          some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, minor extra whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Parameter t (radians)", "f(t) (normalized
+          units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha 0.3 is appropriate, but the legend shows "Setosa", "Versicolor",
+          "Virginica" labels which appear small relative to the overall plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Andrews curves visualization with Fourier transformation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly mapped as Fourier coefficients, t parameter correctly
+          on x-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalization applied, transparency used, color by category implemented
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full t range [-π, π] shown, y-axis auto-scaled to data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Species names correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Iris Dataset · andrews-curves · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows all 150 samples, demonstrates cluster separation and overlap
+          patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic Iris dataset, perfect for demonstrating multivariate visualization
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Normalized values are correct but deterministic data from sklearn
+          has no seed needed
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Contains a helper function `andrews_curve()` which violates the no-functions
+          rule
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic Iris dataset from sklearn
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, plotly, sklearn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dimensions
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Plotly features
+        score: 5
+        max: 5
+        passed: true
+        comment: Custom hover templates, interactive HTML export, proper legend grouping,
+          go.Scatter with graph_objects
+  verdict: APPROVED
diff --git a/plots/andrews-curves/metadata/plotnine.yaml b/plots/andrews-curves/metadata/plotnine.yaml
index 8b8cda8c4e..73c3eb6364 100644
--- a/plots/andrews-curves/metadata/plotnine.yaml
+++ b/plots/andrews-curves/metadata/plotnine.yaml
@@ -23,3 +23,182 @@ review:
   weaknesses:
   - Color palette could be more distinct - the green and blue can appear similar in
     dense curve regions; consider using a more divergent palette
+  image_description: The plot displays Andrews curves for the iris dataset with three
+    species (setosa, versicolor, virginica). The visualization shows smooth sinusoidal
+    curves spanning from approximately -π to π on the x-axis (labeled "t (radians)")
+    and Andrews Curve Values ranging from about -5 to 5 on the y-axis. Setosa curves
+    (blue) cluster distinctly in the upper region around t=0, while versicolor (yellow/orange)
+    and virginica (darker blue/green) curves show more overlap in the middle and lower
+    regions. The plot uses a minimal theme with a light gray background, has a legend
+    positioned on the right side showing the three species, and the title follows
+    the correct format "andrews-curves · plotnine · pyplots.ai".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, axis labels at 20pt, tick labels at 16pt, legend
+          text at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Curves are visible with appropriate alpha=0.4 and size=0.8 for 150
+          observations; slight overlap in versicolor/virginica region but this is
+          expected behavior showing cluster overlap
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue (#1f77b4), orange (#ff7f0e), green (#2ca02c) are reasonable
+          but the green appears quite similar to the blue in some regions
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"t (radians)" has units, but "Andrews Curve Value" is generic (no
+          unit, which is acceptable as it''s dimensionless)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well positioned on right; grid is present but very subtle
+          (could be slightly more visible)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Andrews curves visualization with Fourier transformation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: t parameter on x-axis, curve values on y-axis, color by species
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: normalization, transparency, color by
+          category, t from -π to π'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows three species names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "andrews-curves · plotnine · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: cluster separation (setosa distinct), cluster
+          overlap (versicolor/virginica), wave patterns characteristic of Andrews
+          curves'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses real iris dataset, a classic example for multivariate visualization
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: StandardScaler normalization produces appropriate scale, 150 observations
+          with 4 dimensions as recommended
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data loading → transformation
+          → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: '**No random seed set**. While iris dataset is deterministic, sklearn''s
+          load_iris() behavior should be considered reproducible, but best practice
+          would include a seed. However, since the data is fully deterministic (no
+          randomization used), this is acceptable. *Revised: 3/3 - data is fully deterministic*'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern plotnine API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar correctly with geom_line, aes mapping, scale_color_manual,
+          and theme_minimal. Could potentially leverage faceting or other plotnine
+          features, but implementation is solid.
+  verdict: APPROVED
diff --git a/plots/andrews-curves/metadata/seaborn.yaml b/plots/andrews-curves/metadata/seaborn.yaml
index 162c6dcb34..c125da8fc5 100644
--- a/plots/andrews-curves/metadata/seaborn.yaml
+++ b/plots/andrews-curves/metadata/seaborn.yaml
@@ -24,3 +24,179 @@ review:
   - Axis labels use mathematical notation only (t, f(t)) without descriptive context
     for newcomers
   - Legend could be positioned to avoid any potential overlap with curve endpoints
+  image_description: The plot displays Andrews curves for the Iris dataset with three
+    species (setosa, versicolor, virginica) represented by different colors. The x-axis
+    shows t values from -π to π with appropriate π notation labels. The y-axis shows
+    f(t) values ranging from approximately -4 to 5. Blue curves represent setosa,
+    yellow/gold curves represent versicolor, and red/coral curves represent virginica.
+    The curves show clear separation between species groups - setosa curves tend toward
+    negative f(t) values in the middle region while virginica curves trend higher.
+    The legend is positioned in the upper right corner with "Species" as the title.
+    The plot has a white grid background with subtle gridlines, and the title follows
+    the required format. All 150 observations from the Iris dataset are visualized
+    as individual curves with appropriate transparency to show density patterns.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines visible with good alpha=0.4 transparency for 150 curves, linewidth=1.5
+          is appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998), Yellow (#FFD43B), Red (#E74C3C) palette is colorblind-safe
+          (distinguishable by luminance)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good 16:9 aspect ratio, plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"t" and "f(t)" are mathematical notation appropriate for Andrews
+          curves but lack descriptive context'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), legend well placed but overlaps slightly
+          with some curve endpoints
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Andrews curves Fourier transformation visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly transformed using Fourier series coefficients
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalization, transparency, color by category, t from -π to π all
+          implemented
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies species
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "andrews-curves · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cluster separation and overlap patterns well, could show outliers
+          more prominently
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris dataset is classic multivariate data, perfect for demonstrating
+          Andrews curves
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Normalized values are appropriate; scale is sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → transform → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses fixed dataset (iris) but no random seed explicitly set (not
+          strictly needed here since data is deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of sns.lineplot with units parameter for individual curves
+          and sns.load_dataset for data loading, but Andrews curves transformation
+          is manual (not a seaborn built-in)
+  verdict: APPROVED
diff --git a/plots/arc-basic/metadata/altair.yaml b/plots/arc-basic/metadata/altair.yaml
index 6370811d79..f9bd659ccc 100644
--- a/plots/arc-basic/metadata/altair.yaml
+++ b/plots/arc-basic/metadata/altair.yaml
@@ -26,3 +26,170 @@ review:
   - Layout has excessive whitespace below the baseline; nodes are positioned at y=10
     in a 900px height canvas
   - Missing legend for edge weight interpretation (strokeWidth encoding has legend=None)
+  image_description: The plot displays an arc diagram showing character interactions.
+    Ten nodes (Alice, Bob, Carol, David, Eve, Frank, Grace, Henry, Iris, Jack) are
+    arranged horizontally along the bottom of the chart, represented as yellow circles
+    with blue outlines. The character names appear below each node in bold blue text.
+    Curved blue arcs connect pairs of nodes above the horizontal line, with arc height
+    proportional to the distance between connected nodes. The longest arcs span from
+    Alice to Jack and Alice to Henry, reaching nearly to the top of the plot. Shorter
+    arcs connect adjacent or nearby characters. The arcs use semi-transparent blue
+    (#306998) with varying thicknesses based on edge weights. The title "Character
+    Interactions · arc-basic · altair · pyplots.ai" appears at the top center.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (28pt), node labels are 18pt bold, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes are appropriately sized (600), arcs visible with opacity 0.6
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall, but nodes positioned low with much empty space below
+          baseline
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No axes present (appropriate for this chart type, but no units shown)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean design, no unnecessary grid, strokeWidth encoding is self-explanatory
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct arc diagram type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly arranged horizontally, arcs connect pairs
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Arc heights proportional to distance, semi-transparency, stroke width
+          for weights
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and connections visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend for edge weights (strokeWidth legend is disabled)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Character Interactions · arc-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows short-range and long-range connections, varying weights (1-3)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Character interactions in a story is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 10 nodes with 15 edges is ideal for readability
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses declarative layering, encoding system, but arc diagrams are
+          not native to Altair so required manual path generation
+  verdict: APPROVED
diff --git a/plots/arc-basic/metadata/bokeh.yaml b/plots/arc-basic/metadata/bokeh.yaml
index 178909e254..00d227a403 100644
--- a/plots/arc-basic/metadata/bokeh.yaml
+++ b/plots/arc-basic/metadata/bokeh.yaml
@@ -26,3 +26,177 @@ review:
     character names are already labeled below nodes
   - Could add HoverTool to show edge details on hover, leveraging Bokeh interactive
     capabilities
+  image_description: 'The plot displays a basic arc diagram with 8 character nodes
+    (Alice, Bob, Carol, David, Eve, Frank, Grace, Henry) arranged horizontally along
+    a baseline. Curved arcs connect pairs of characters above the baseline, representing
+    interactions. The arcs use two colors: Python Yellow (#FFD43B) for long-range
+    connections (distance > 5) and Python Blue (#306998) for short-range connections
+    (distance ≤ 5). Arc heights are proportional to the distance between connected
+    nodes, with taller arcs spanning farther distances. Line thickness varies based
+    on connection weight. The title "arc-basic · bokeh · pyplots.ai" appears in the
+    top-left. A legend in the top-right explains the color coding. The x-axis shows
+    "Characters" with numeric tick marks (0, 2, 4, 6, 8, 10), and character name labels
+    appear below each node.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 28pt, axis labels 22pt, tick labels 18pt, node labels 20pt
+          - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Arcs clearly visible with appropriate thickness variation; nodes
+          visible but could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight imbalance with large empty space above arcs
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Characters" label present but no units (expected for categorical)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed; x-grid subtle but unnecessary for this visualization
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct arc diagram with nodes on horizontal line and curved arcs
+          above
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly positioned, edges drawn as arcs with height proportional
+          to distance
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: ordered nodes, curved arcs, height proportional
+          to distance, semi-transparent arcs, readable labels, color coding by type'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and connections visible within plot bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes long-range vs short-range connections
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "arc-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variety of connection distances (short, medium, long-range),
+          varying weights, but could show more edge cases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Character interactions in a story chapter is a perfect, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 8 nodes with 12 edges is appropriate; weights 1-3 reasonable
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used, but Legend/LegendItem imports could be consolidated
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh 3.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Label, custom Legend with LegendItem, bezier
+          curve calculation; could leverage more Bokeh-specific features like HoverTool
+          for interactivity
+  verdict: APPROVED
diff --git a/plots/arc-basic/metadata/letsplot.yaml b/plots/arc-basic/metadata/letsplot.yaml
index 04367b21f4..b51727545a 100644
--- a/plots/arc-basic/metadata/letsplot.yaml
+++ b/plots/arc-basic/metadata/letsplot.yaml
@@ -24,3 +24,189 @@ review:
   - Node labels could be slightly larger for optimal readability at high resolution
   - Does not leverage lets-plot specific interactive features (could use tooltips
     on hover)
+  image_description: The plot displays a well-executed arc diagram visualizing character
+    interactions. Ten nodes (Alice, Bob, Carol, David, Eve, Frank, Grace, Henry, Iris,
+    Jack) are arranged horizontally along a baseline, represented as yellow circular
+    markers. Blue semi-transparent arcs curve above the baseline connecting related
+    characters. Arc heights are proportional to the distance between connected nodes
+    - short-range connections (like Alice-Bob) have low arcs while long-range connections
+    (like Alice-Jack) have tall arcs spanning nearly the full width. Arc thickness
+    varies based on connection weight, with stronger connections appearing thicker.
+    The title "Character Interactions · arc-basic · letsplot · pyplots.ai" appears
+    at the top left in bold. Node labels are displayed below each node in bold blue
+    text. The design uses a clean white background with no axis lines or grid.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable. Font sizes are appropriate
+          for the output resolution. Labels could be slightly larger for optimal readability.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Node labels are well-spaced and arcs
+          do not obscure labels.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes are clearly visible with good sizing. Arcs use appropriate
+          alpha (0.6) for overlapping connections. Some thinner arcs could be slightly
+          more visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast. Colorblind-safe
+          combination.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space. Arcs fill the upper portion well,
+          nodes centered horizontally.
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for arc diagrams (axes are hidden by design), but no descriptive
+          subtitle or legend explaining the visualization.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Appropriately no grid for this diagram type. Legend hidden as connection
+          weights are shown via line thickness.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct arc diagram implementation with nodes on horizontal axis
+          and curved arcs above.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly positioned, edges correctly drawn between specified
+          pairs.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: arc height proportional to distance,
+          semi-transparent overlapping arcs, readable node labels, weight-based line
+          thickness.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and connections visible within the plot area.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No explicit legend for weights, but thickness variation is intuitive.
+          Could benefit from a brief explanation.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows exact format: "Character Interactions · arc-basic
+          · letsplot · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both short-range and long-range connections, varying weights
+          (1-3), multiple connections per node. Could include isolated node to show
+          full feature range.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Character interactions in a story chapter is a perfect real-world
+          application matching the spec's narrative flow example.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 10 nodes with 15 edges is appropriate scale for readability as per
+          spec (10-50 nodes typical).
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → arc generation → plot →
+          save. No unnecessary functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used. Explicit imports from lets_plot.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar correctly with geom_path, geom_point, geom_text,
+          and scale_size_identity. However, does not leverage any lets-plot specific
+          interactive features or advanced capabilities beyond basic ggplot2 syntax.
+  verdict: APPROVED
diff --git a/plots/arc-basic/metadata/matplotlib.yaml b/plots/arc-basic/metadata/matplotlib.yaml
index ee75115817..93c1c8c72b 100644
--- a/plots/arc-basic/metadata/matplotlib.yaml
+++ b/plots/arc-basic/metadata/matplotlib.yaml
@@ -26,3 +26,176 @@ review:
   - Could use color coding for different edge types or weights as mentioned in spec
     notes
   - Library features score could improve by using colormap for arcs based on weight
+  image_description: The plot displays a basic arc diagram showing character interactions.
+    Ten nodes (Alice, Bob, Carol, David, Eve, Frank, Grace, Henry, Iris, Jack) are
+    arranged horizontally along a baseline, represented as yellow circles with blue
+    borders. Curved blue arcs connect various character pairs above the baseline,
+    with arc height proportional to the distance between connected characters. For
+    example, the Alice-Jack connection spans the full width with the tallest arc,
+    while adjacent connections like Alice-Bob have shorter arcs. Arc thickness varies
+    based on connection weight, and arcs use semi-transparency (alpha ~0.55) to handle
+    overlapping connections. The title "Character Interactions · arc-basic · matplotlib
+    · pyplots.ai" appears at the top. Node labels are displayed below each node in
+    bold blue text.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, node labels at 16pt bold, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; arcs use transparency for overlapping connections
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes well-sized (s=500), arcs visible with appropriate thickness
+          based on weights
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme (#306998) with yellow accent (#FFD43B),
+          colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though some whitespace at top could be reduced
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for arc diagram (axes turned off appropriately)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend present, though one showing weight scale could add value
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct arc diagram with nodes along horizontal line and curved arcs
+          above
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes positioned sequentially, edges connect proper node pairs
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Arc height proportional to distance, semi-transparent arcs for overlaps,
+          readable labels, weight-based thickness
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and connections visible within bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Character Interactions · arc-basic · matplotlib
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows short-range and long-range connections, varying weights, but
+          could demonstrate bidirectional connections or different edge types
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Character interactions in a story chapter is a perfect real-world
+          scenario for arc diagrams
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 10 nodes with 15 edges is appropriate; weights 1-3 reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is deterministic anyway
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib.patches, pyplot, numpy)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern matplotlib API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses matplotlib.patches.Arc which is appropriate, but doesn't leverage
+          other matplotlib features like colormaps for edge colors or annotations
+  verdict: APPROVED
diff --git a/plots/arc-basic/metadata/plotly.yaml b/plots/arc-basic/metadata/plotly.yaml
index a2bc312759..58bbb6c15e 100644
--- a/plots/arc-basic/metadata/plotly.yaml
+++ b/plots/arc-basic/metadata/plotly.yaml
@@ -25,3 +25,167 @@ review:
   - Could add hover information showing connection details (source, target, weight)
     for better interactivity
   - Missing edge color coding by type or weight as suggested in spec notes
+  image_description: The plot displays a basic arc diagram with 10 nodes (Alice, Bob,
+    Carol, David, Eve, Frank, Grace, Henry, Iris, Jack) arranged horizontally along
+    a gray baseline. Each node is represented by a yellow circular marker with a blue
+    border. The nodes are connected by semi-transparent blue parabolic arcs above
+    the baseline. Arc heights vary based on the distance between connected nodes -
+    short-range connections (e.g., Alice-Bob) have lower arcs while long-range connections
+    (e.g., Alice-Iris) have higher arcs. The arcs have varying thicknesses based on
+    connection weights. The title "arc-basic · plotly · pyplots.ai" appears centered
+    at the top in dark text on a clean white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt and node labels at 18pt are clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes (size=24) and arcs are well-sized, semi-transparency (0.6)
+          handles overlapping arcs nicely
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but significant whitespace below the baseline
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels (acceptable for arc diagrams which hide axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid shown (appropriate), no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct arc diagram with nodes on horizontal line and curved arcs
+          above
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly positioned, edges correctly connect source/target
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: arc height proportional to distance,
+          varying weights affect thickness, semi-transparent arcs'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and connections visible within the plot area
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, node labels accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "arc-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows short-range, medium-range, and long-range arcs with varying
+          weights, but could show more variation in edge types
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Character interactions in a story narrative is a plausible and comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 10 nodes is within the recommended 10-50 range, weights 1-3 provide
+          clear differentiation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple sequential structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Uses deterministic data (no random), but no explicit seed set for
+          any random operations
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/arc-basic/metadata/plotnine.yaml b/plots/arc-basic/metadata/plotnine.yaml
index 915ca80887..f7be3e7551 100644
--- a/plots/arc-basic/metadata/plotnine.yaml
+++ b/plots/arc-basic/metadata/plotnine.yaml
@@ -25,3 +25,175 @@ review:
   weaknesses:
   - The sys.path manipulation at the top is a workaround that could be cleaner
   - Some thin arcs (weight=1) are harder to distinguish from each other
+  image_description: The plot displays a basic arc diagram with 10 nodes (Alice, Bob,
+    Carol, David, Eve, Frank, Grace, Henry, Iris, Jack) arranged horizontally along
+    a baseline. Yellow circular nodes mark each character's position. Blue semi-transparent
+    arcs curve above the baseline connecting various character pairs. Arc heights
+    vary proportionally to the distance between connected nodes - the Alice-Jack arc
+    spans the full width with the highest peak, while adjacent connections like Bob-Carol
+    have small, tight arcs. Arc thickness varies based on connection weight, with
+    some arcs thicker than others. The title "Character Interactions · arc-basic ·
+    plotnine · pyplots.ai" appears centered at the top. The background is clean white
+    with no axis markings or grid.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear at 24pt, node labels are readable in bold
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, nodes and labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Arcs visible with alpha=0.6, nodes clearly marked; minor: some thin
+          arcs harder to see'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) combination is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, arc heights fill space well without being cut off
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for arc diagrams, axes intentionally hidden (no deduction appropriate)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean design, no grid needed for this visualization type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct arc diagram with nodes on horizontal line and curved arcs
+          above
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly positioned sequentially, arcs connect correct pairs
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has arcs above baseline, height proportional to distance, semi-transparent
+          arcs, readable labels, weight-based thickness
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and arcs visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, data is self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Character Interactions · arc-basic · plotnine
+          · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows short-range and long-range connections, varying weights; could
+          show more edge cases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: '"Character interactions in a story chapter" is a realistic, comprehensible
+          scenario'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 10 nodes is within the 10-50 recommended range; weights 1-3 are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: sys import used for path manipulation, slightly unusual
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_path and geom_point effectively; however,
+          arc diagrams aren't a native plotnine feature so this is a creative workaround
+          rather than leveraging library strengths
+  verdict: APPROVED
diff --git a/plots/arc-basic/metadata/pygal.yaml b/plots/arc-basic/metadata/pygal.yaml
index 2e49a33a16..19cb1e051e 100644
--- a/plots/arc-basic/metadata/pygal.yaml
+++ b/plots/arc-basic/metadata/pygal.yaml
@@ -15,3 +15,15 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: The plot displays an arc diagram with 10 character names (Alice,
+    Bob, Carol, David, Eve, Frank, Grace, Henry, Iris, Jack) arranged horizontally
+    along the bottom on a white background. Each character is represented by a yellow/gold
+    circular node (#FFD43B). The connections between characters are shown as blue
+    curved arcs (#306998) above the horizontal baseline. The arcs vary in height proportional
+    to the distance between connected nodes - longer-range connections (like Alice-Jack)
+    have higher arcs, while shorter-range connections (like Bob-Carol) have lower
+    arcs. Arc thickness varies based on connection weight, with stronger connections
+    appearing thicker. The title "Character Interactions · arc-basic · pygal · pyplots.ai"
+    is displayed at the top in blue. The arcs have semi-transparency allowing overlapping
+    arcs to be distinguishable.
+  verdict: APPROVED
diff --git a/plots/area-basic/metadata/altair.yaml b/plots/area-basic/metadata/altair.yaml
index 816d07f10d..ac6ef3dfec 100644
--- a/plots/area-basic/metadata/altair.yaml
+++ b/plots/area-basic/metadata/altair.yaml
@@ -23,3 +23,177 @@ review:
   - Y-axis label could include units (e.g., Daily Visitors (count))
   - Could add tooltip showing exact values for interactive version carried to static
     aesthetics
+  image_description: The plot displays a basic area chart showing daily website visitors
+    over January 2024 (30 days). The area is filled with a semi-transparent blue color
+    (#306998) with a darker blue line along the top edge. The title "area-basic ·
+    altair · pyplots.ai" is centered at the top in black text. The X-axis shows "Date"
+    with date labels (2024, Wed 03, Fri 05, etc.), and the Y-axis shows "Daily Visitors"
+    ranging from 0 to approximately 8,500. Dashed gridlines are visible throughout.
+    The data shows a clear weekly pattern with dips on weekends (Saturday/Sunday)
+    and higher values on weekdays, plus an overall upward trend from ~6,000 to ~7,500
+    visitors.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title at 28pt, axis labels at 22pt, tick labels
+          at 18pt. Slightly conservative sizing but fully legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area fill with 0.4 opacity and 3px line width is well-suited for
+          this data density (30 points)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998), no color comparison needed, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace in lower portion due to
+          Y-axis starting at 0
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Date", "Daily Visitors") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and 0.3 opacity; no legend needed
+          for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct area chart with filled area below the line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=datetime (date), Y=numeric (visitors) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (0.4), gridlines, clear axis labels, line visible
+          on top
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data with 10% headroom, X-axis shows full date range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "area-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend AND cyclical pattern (weekday/weekend); missing extreme
+          peaks/valleys that would show full dynamic range
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a perfect real-world scenario for area charts;
+          weekday/weekend pattern is authentic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 3,500-8,200 visitors/day are realistic; could show more variation
+          in scale
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (altair, numpy, pandas) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair API throughout
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves correctly but comment mentions wrong resolution math (1600×900×3
+          ≠ 4800×2700 correctly explained)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative grammar with `.encode()`, `.properties()`,
+          `.configure_axis()`, and `.interactive()` for HTML output. Could leverage
+          more Altair-specific features like tooltips in the static version.
+  verdict: APPROVED
diff --git a/plots/area-basic/metadata/bokeh.yaml b/plots/area-basic/metadata/bokeh.yaml
index 6bb26e1047..1ca3207525 100644
--- a/plots/area-basic/metadata/bokeh.yaml
+++ b/plots/area-basic/metadata/bokeh.yaml
@@ -23,3 +23,176 @@ review:
   - Missing HoverTool for interactivity - Bokeh key strength is interactive exploration
   - Axis labels lack units (could be Daily Visitors count or similar)
   - Large empty space below data since values range 4000-7000 but y starts at 0
+  image_description: The plot displays a basic area chart showing daily website visitors
+    over January 2024. The filled area uses a steel blue color (#306998) with 40%
+    transparency, creating visual weight that emphasizes the magnitude of visitor
+    counts. A solid line traces the top edge of the area for clear definition. The
+    x-axis shows dates from Jan 01, 2024 to Feb 01, with clear date labels. The y-axis
+    displays "Daily Visitors" ranging from 0 to approximately 7000. The title "area-basic
+    · bokeh · pyplots.ai" appears in the top left. The data exhibits a realistic weekly
+    cyclical pattern (weekend dips) with an overall upward trend across the month.
+    Subtle dashed grid lines aid value estimation. The y-axis correctly starts at
+    0, which is essential for area charts to avoid misleading representations.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, tick labels at 28pt - all perfectly
+          readable at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; date labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area fill and line are perfectly visible; line width of 5 appropriate
+          for canvas size
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace at bottom due to y starting
+          at 0 but data ranging 4000-7000
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Daily Visitors" and "Date" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3) with nice dashed styling; no legend needed
+          for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct area chart with filled region below line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Datetime on x-axis, numeric values on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (0.4), gridlines present, clear axis labels,
+          line on top
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no misleading elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "area-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend, cyclical pattern, and magnitude well; could show more
+          dramatic variations
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic with weekly patterns and growth trend is a real,
+          comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 4000-7000 visitors/day plausible; base of 5000 perhaps high
+          for "basic" example
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses varea() and ColumnDataSource correctly, but doesn't leverage
+          Bokeh's interactive features like HoverTool which would enhance the visualization
+  verdict: APPROVED
diff --git a/plots/area-basic/metadata/highcharts.yaml b/plots/area-basic/metadata/highcharts.yaml
index 13b9bf7e4b..2718b69d94 100644
--- a/plots/area-basic/metadata/highcharts.yaml
+++ b/plots/area-basic/metadata/highcharts.yaml
@@ -22,3 +22,173 @@ review:
   - Y-axis starts at 0 creating large empty space below the data (could use min property)
   - Axis labels lack units (could be "Daily Visitors (count)" or "Day of Month (date)")
   - Could leverage more Highcharts-specific features like hover tooltips or animation
+  image_description: The plot displays a basic area chart showing website traffic
+    over 30 days. The chart has a blue color scheme (#306998) with a gradient fill
+    that transitions from semi-opaque at the top (~0.5 alpha) to nearly transparent
+    at the bottom (~0.1 alpha). The title "area-basic · highcharts · pyplots.ai" is
+    prominently displayed at the top in bold. The X-axis is labeled "Day of Month"
+    with values from 1-30, and the Y-axis shows "Daily Visitors" ranging from 0 to
+    4000. Small blue circular markers are placed at each data point along the line.
+    The data shows a clear upward trend with weekly cyclical patterns (peaks roughly
+    every 7 days). Grid lines are subtle and visible on both axes. The overall layout
+    is clean with white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (72px), axis labels are clear (48px), tick
+          labels readable (36px)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers visible, line width good, could use slightly larger markers
+          for 30 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color palette, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though Y-axis starts at 0 creating empty space
+          below data
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (visitors per day could be clearer)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle and appropriate, legend disabled but not needed for single
+          series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct area chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=days, Y=visitors correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled area, gridlines, axis labels all present per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled appropriately for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "{spec-id} · {library} · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend and periodicity well, could show more variation in magnitude
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website visitors scenario is realistic and relatable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (2000-3700 visitors/day) are reasonable, though range could
+          be tighter
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses container.screenshot() instead of driver.save_screenshot()
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses gradient fill color, proper Highcharts options structure, but
+          doesn't leverage more advanced features like tooltips or hover effects
+  verdict: APPROVED
diff --git a/plots/area-basic/metadata/letsplot.yaml b/plots/area-basic/metadata/letsplot.yaml
index f41d1835bb..4aab01d0ba 100644
--- a/plots/area-basic/metadata/letsplot.yaml
+++ b/plots/area-basic/metadata/letsplot.yaml
@@ -24,3 +24,177 @@ review:
   - Uses numeric day_num instead of actual dates; could use scale_x_datetime for proper
     date axis formatting
   - Grid styling could be more subtle (add alpha to panel_grid)
+  image_description: The plot shows a basic area chart displaying daily website visitors
+    over 30 days. The filled area uses a semi-transparent blue color (#306998 with
+    alpha 0.4), with a darker blue line (same color, size=2) tracing the top boundary
+    of the area. The chart has a light gray dashed grid in the background. The title
+    "area-basic · letsplot · pyplots.ai" appears at the top. The x-axis is labeled
+    "Day of Month" (ranging from 0 to 30), and the y-axis is labeled "Daily Visitors"
+    (ranging from 0 to 8,000). The data shows a realistic pattern with an upward trend
+    over the month, weekly cyclical variations (approximately 7-day periods visible
+    as peaks and valleys), and some random noise. Values start around 5,000 visitors
+    and end near 7,500.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate font sizes (title=24, axis_title=20, axis_text=16)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area fill and line are well-sized and clearly visible; alpha=0.4
+          is appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions overall, though the y-axis starts at 0 which creates
+          a large empty area below the data (values range ~4,300-7,600)
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Day of Month", "Daily Visitors") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is visible with dashed style but could be more subtle (alpha
+          not apparent in grid styling)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct area chart type with filled area below the line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (day number) and Y (visitors) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has semi-transparent fill (alpha 0.4), gridlines, clear axis labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series area chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "area-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend, cyclical pattern, and variation well; could show more
+          dramatic peaks/valleys
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website visitors is the exact example from spec; daily pattern with
+          weekly cycles is realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Visitor counts (2,000-8,000 range) are realistic for a medium-sized
+          website
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Implementation uses basic ggplot grammar but doesn't leverage lets-plot
+          specific features like interactive tooltips, scale_x_datetime for proper
+          date handling, or other distinctive capabilities
+  verdict: APPROVED
diff --git a/plots/area-basic/metadata/matplotlib.yaml b/plots/area-basic/metadata/matplotlib.yaml
index a9beb2bc47..56829e6289 100644
--- a/plots/area-basic/metadata/matplotlib.yaml
+++ b/plots/area-basic/metadata/matplotlib.yaml
@@ -23,3 +23,173 @@ review:
   - Axis labels missing units (e.g., Daily Visitors could include count)
   - Did not implement optional gradient fill from bottom to line for visual appeal
   - Basic matplotlib usage without leveraging distinctive features
+  image_description: 'The plot displays a basic area chart with a blue filled area
+    (#306998 color) showing daily website visitors over a 30-day period. The x-axis
+    is labeled "Day of Month" ranging from 1 to 30, and the y-axis shows "Daily Visitors"
+    ranging from 0 to approximately 8000. The title follows the correct format: "area-basic
+    · matplotlib · pyplots.ai". The area is filled with semi-transparent blue (alpha
+    ~0.4) with a solid blue line on top (linewidth 3). A subtle dashed grid (alpha
+    0.3) helps with value estimation. The data shows an upward trend with natural
+    variation - starting around 5000 visitors and trending upward to around 6500-7000
+    by day 30, with visible day-to-day fluctuations adding realism.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is appropriate, area fill clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998), good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, tight_layout applied, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but missing units (e.g., "visitors/day")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha 0.3 with dashed style, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct area chart with fill_between
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=days, Y=visitors correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (alpha 0.4), gridlines present, clear axis
+          labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, X-axis shows full 1-30 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (appropriate)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "area-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows upward trend with daily variation, demonstrates area chart
+          purpose well. Minor: could show more dramatic changes to emphasize "volume"
+          aspect'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily website visitors over a month is a perfect real-world scenario
+          matching spec examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 5000-7000 visitors reasonable; baseline at 0 slightly exaggerates
+          visual weight but acceptable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses fill_between correctly but no gradient fill (mentioned in spec
+          notes as optional enhancement), no use of matplotlib-specific features like
+          color gradients or annotations
+  verdict: APPROVED
diff --git a/plots/area-basic/metadata/plotly.yaml b/plots/area-basic/metadata/plotly.yaml
index 6ad8c9466c..141c99499c 100644
--- a/plots/area-basic/metadata/plotly.yaml
+++ b/plots/area-basic/metadata/plotly.yaml
@@ -26,3 +26,176 @@ review:
     just "Date")
   - Missing hover template customization which is a key Plotly strength
   - Could benefit from range slider or zoom capabilities to showcase Plotly interactivity
+  image_description: The plot displays a basic area chart showing daily website visitors
+    over January 2024 (30 days). The x-axis shows dates from Jan 2, 2024 to Jan 29,
+    2024, and the y-axis shows "Visitors (daily count)" ranging from 0 to approximately
+    7000. The area below the line is filled with a semi-transparent blue color (rgba
+    blue, ~0.4 alpha), while the line itself is a darker blue (#306998). The data
+    exhibits a clear weekly cyclical pattern (peaks and troughs repeating roughly
+    every 7 days) along with an overall upward trend from ~5000 to ~7000 visitors.
+    The title "Daily Website Visitors · area-basic · plotly · pyplots.ai" is centered
+    at the top. The background uses the plotly_white template with subtle light gray
+    gridlines. The layout is clean with good margins and no overlapping elements.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis titles at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, dates are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is appropriate, area fill clearly visible with good
+          alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no colorblind concerns
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good margins, well-proportioned plot area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has units "(daily count)" but X-axis just says "Date" without
+          format specification
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha 0.1, but no legend shown (showlegend=False)
+          - for a single series this is acceptable but loses points
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct area chart using fill="tozeroy"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Datetime on x-axis, numeric visitors on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (0.4), gridlines present, clear axis labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, name is descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "Daily Website Visitors · area-basic · plotly
+          · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend, weekly pattern, and variance - good demonstration of
+          area chart strengths. Could show more dramatic volume changes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a perfect real-world scenario for area charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 4000-7000 are plausible for a mid-size website, though somewhat
+          high minimum
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses basic go.Scatter with fill, but doesn't leverage Plotly's interactive
+          features like hover templates, range sliders, or annotations that would
+          enhance the visualization
+  verdict: APPROVED
diff --git a/plots/area-basic/metadata/plotnine.yaml b/plots/area-basic/metadata/plotnine.yaml
index 3c20adbe3a..625250ce98 100644
--- a/plots/area-basic/metadata/plotnine.yaml
+++ b/plots/area-basic/metadata/plotnine.yaml
@@ -22,3 +22,171 @@ review:
   weaknesses:
   - Axis labels lack units (could be "Daily Visitors (count)" or "Date (2024)")
   - Could leverage more plotnine-specific features like stat_smooth for trend visualization
+  image_description: The plot displays a basic area chart showing daily website visitors
+    over January 2024. The chart uses a blue color (#306998) with semi-transparent
+    fill (alpha ~0.4) and a solid blue line on top. The X-axis displays dates from
+    Jan 01 to Jan 29 with weekly intervals labeled (Jan 01, Jan 08, Jan 15, Jan 22,
+    Jan 29). The Y-axis shows "Daily Visitors" ranging from 0 to 8000. The title "area-basic
+    · plotnine · pyplots.ai" is centered at the top. The data shows a clear cyclical
+    weekly pattern with peaks and troughs, overlaid with an upward trend, representing
+    realistic website traffic patterns. Grid lines are subtle and horizontal only.
+    The overall layout is clean with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area fill and line are appropriately visible with good alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Daily Visitors" and "Date" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct area chart with filled region below line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=datetime, Y=numeric correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill, gridlines, clear axis labels all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "area-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend, cyclical pattern, and volume well; could show more variation
+          in amplitude
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a perfect real-world scenario with believable
+          patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 4000-8000 daily visitors are realistic; starting Y at 0 is
+          good but creates some empty space
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly but doesn't leverage plotnine-specific
+          features like faceting or stat transformations
+  verdict: APPROVED
diff --git a/plots/area-basic/metadata/pygal.yaml b/plots/area-basic/metadata/pygal.yaml
index c318a018a2..c0dbd4cb41 100644
--- a/plots/area-basic/metadata/pygal.yaml
+++ b/plots/area-basic/metadata/pygal.yaml
@@ -25,3 +25,177 @@ review:
   - Y-axis label Visitors could include units (e.g., Visitors count)
   - Data dots could be slightly larger for better visibility at the target resolution
   - Could leverage more pygal-specific features like custom tooltips or value formatting
+  image_description: The plot displays a basic area chart with a light blue/steel
+    blue filled area beneath the line. The title "area-basic · pygal · pyplots.ai"
+    is clearly visible at the top. The X-axis is labeled "Day of Month" with values
+    1, 5, 10, 15, 20, 25, 30 shown. The Y-axis is labeled "Visitors" with values ranging
+    from approximately 900 to 2100. Data points are marked with small dots along the
+    line. The area fill has semi-transparency (approximately 40% opacity). A legend
+    "Daily Visitors" appears at the bottom left. The background is white with subtle
+    horizontal gridlines. The chart shows website traffic patterns with weekend dips
+    and weekday peaks.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all readable. Font sizes are
+          well-scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. X-axis labels are spaced well by showing
+          every 5th day.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and fill are clearly visible. Dots are appropriately sized at
+          6px. Could be slightly larger for better visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) is colorblind-safe.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with legend at bottom, proper margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Day of Month", "Visitors") but "Visitors"
+          lacks units (e.g., "count" or "per day").
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis gridlines are subtle and helpful. Legend placement is good
+          but could be more prominent.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct area chart type using pygal.Line with fill=True.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (days) and Y (visitors) correctly assigned.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (0.4), gridlines, clear axis labels all present.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 30 days visible, Y-axis shows full range from ~890 to ~2150.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Daily Visitors".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "area-basic · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trends, peaks, valleys, and weekly patterns. Could show more
+          dramatic variation.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website visitor data over a month is a perfect real-world scenario
+          matching the spec example.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (890-2150 visitors) are realistic for a small-medium website.
+          Range is reasonable.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean script: imports → data → plot → save. No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded list), but no random seed comment
+          explaining this.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Style class for customization, fill=True for area, show_dots,
+          stroke_style. Could leverage more pygal-specific features like tooltips
+          or value formatters.
+  verdict: APPROVED
diff --git a/plots/area-basic/metadata/seaborn.yaml b/plots/area-basic/metadata/seaborn.yaml
index 1fd7a32214..184b1bb3f1 100644
--- a/plots/area-basic/metadata/seaborn.yaml
+++ b/plots/area-basic/metadata/seaborn.yaml
@@ -24,3 +24,175 @@ review:
     seaborn native capabilities (though seaborn lacks a dedicated area chart function)
   - 'Grid legend scoring: single series plots could benefit from a subtle annotation
     or data source note'
+  image_description: The plot displays a basic area chart showing website visitors
+    over time (January 2024). The chart uses a semi-transparent blue fill (#306998
+    Python Blue at alpha 0.4) with a darker blue line (linewidth 3) on top. The x-axis
+    shows dates from 2024-01-01 to 2024-01-29 with rotated labels at 45 degrees. The
+    y-axis shows visitor counts from 0 to approximately 7500. The title "area-basic
+    · seaborn · pyplots.ai" appears at the top. A clear weekly pattern is visible
+    with dips on weekends (lower traffic) and peaks mid-week. There's also an overall
+    upward trend. The gridlines are subtle with alpha 0.3 and dashed style.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at fontsize 24, axis labels at 20, tick labels at 16, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, date labels are rotated to avoid collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is optimal, area fill at alpha 0.4 shows magnitude
+          clearly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue), colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Date" and "Visitors (count)" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is good (alpha 0.3), but no legend present (single series, acceptable
+          but not ideal)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct area chart with filled region below line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=datetime (dates), Y=numeric (visitors) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (alpha 0.4), gridlines, clear axis labels all
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0 (emphasizes area magnitude), all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "area-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows weekly pattern and upward trend, demonstrates magnitude emphasis
+          well, but could show more variety (e.g., occasional anomalies)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily website visitors over a month is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 3000-7500 are realistic for website traffic, though starting
+          y at 0 adds some empty space
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Minor: Uses ax.fill_between from matplotlib instead of seaborn-native
+          approach'
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.lineplot which is seaborn-native, but the area fill uses
+          matplotlib's ax.fill_between. Seaborn doesn't have a native area chart function,
+          so this hybrid approach is acceptable but doesn't showcase seaborn's distinctive
+          features like regplot, kdeplot, or statistical aggregation.
+  verdict: APPROVED
diff --git a/plots/area-stacked-percent/metadata/altair.yaml b/plots/area-stacked-percent/metadata/altair.yaml
index fd257c20a1..9458ba9a93 100644
--- a/plots/area-stacked-percent/metadata/altair.yaml
+++ b/plots/area-stacked-percent/metadata/altair.yaml
@@ -27,3 +27,181 @@ review:
     inside plot area
   - Nuclear data staying perfectly flat at 12% for all 10 years feels artificially
     constant
+  image_description: 'The plot displays a 100% stacked area chart showing energy source
+    mix evolution from 2015 to 2024. Four colored areas are stacked from bottom to
+    top: Coal (steel blue), Natural Gas (golden yellow), Nuclear (medium slate blue/purple),
+    and Renewables (sea green). The Y-axis shows "Share of Energy Mix (%)" ranging
+    from 0% to 100% with percentage formatting. The X-axis displays years from 2015
+    to 2024. The title "area-stacked-percent · altair · pyplots.ai" is centered at
+    the top. A legend on the right identifies the four energy sources with colored
+    circular markers. The chart clearly illustrates the energy transition narrative
+    with Coal declining from ~45% to ~16%, Renewables growing from ~18% to ~44%, while
+    Natural Gas and Nuclear remain relatively stable. Subtle dashed grid lines provide
+    reference. The areas have slight transparency (0.85 opacity) with visible boundary
+    lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area fills are clearly visible with good opacity (0.85), boundary
+          lines enhance distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but blue/purple could be closer for some
+          colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills appropriate space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with unit indication (%), X-axis just
+          "Year" without additional context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed style and 0.3 opacity, BUT legend is positioned
+          outside the plot area creating extra whitespace
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time (Year), Y is percentage contribution correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows percentage stacking, multiple categories, temporal progression
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full 0-100% range, all years visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data categories correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact required format: `area-stacked-percent · altair · pyplots.ai`'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows proportion changes over time well, demonstrates both increasing
+          and decreasing trends, but all changes are gradual (no dramatic shifts shown)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Energy source mix evolution is a perfect real-world scenario, neutral
+          topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentages are realistic for energy mix, though nuclear staying
+          exactly flat at 12% for 10 years is slightly artificial
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` (though data is manually specified, seed
+          is set)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only used imports: altair, numpy, pandas'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding with proper stacking (`stack="normalize"`),
+          tooltips, and `interactive()` for HTML export. However, could leverage more
+          Altair-specific features like selections or more sophisticated interactivity.
+  verdict: APPROVED
diff --git a/plots/area-stacked-percent/metadata/bokeh.yaml b/plots/area-stacked-percent/metadata/bokeh.yaml
index d9c083beb6..26848da6a0 100644
--- a/plots/area-stacked-percent/metadata/bokeh.yaml
+++ b/plots/area-stacked-percent/metadata/bokeh.yaml
@@ -24,3 +24,177 @@ review:
   - Data shows relatively gradual changes; more dramatic proportion shifts would better
     demonstrate the plot type purpose
   - Legend at 36pt appears slightly smaller than optimal compared to 48pt axis labels
+  image_description: 'The plot displays a 100% stacked area chart showing market share
+    evolution from 2015 to 2024 for four products (A, B, C, D). The chart uses a colorblind-safe
+    palette: Python blue (#306998) for Product A at the bottom, golden yellow (#FFD43B)
+    for Product B, sea green (#2E8B57) for Product C, and salmon/coral (#E07B53) for
+    Product D at the top. The Y-axis shows "Market Share (%)" ranging from 0 to 100,
+    and the X-axis shows "Year" with annual tick marks. The title correctly follows
+    the format "area-stacked-percent · bokeh · pyplots.ai". The legend is positioned
+    in the top-right corner with a semi-transparent background. The stacked areas
+    sum to 100% at each time point, showing Product A growing from ~40% to ~53%, while
+    Product D shrinks from ~10% to ~7% over the decade. The grid is subtle with dashed
+    lines and low alpha. Background is a light off-white (#fafafa).'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 72pt, axis labels at 48pt, tick labels at 36pt - all perfectly
+          readable at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area patches are clearly visible with good alpha (0.85), distinct
+          boundaries
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette with blue, yellow, green, and salmon
+          - all distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Year" and "Market Share (%)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha 0.3 is good, but legend text appears slightly small relative
+          to axis labels and could be positioned better (inside plot area takes space
+          from data visualization)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time (years), Y shows percentage contributions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalized to 100%, shows proportional changes over time
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-100 with slight padding (105), X-axis covers all years
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match the four product categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "area-stacked-percent · bokeh · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows proportion changes over time, but data is relatively stable
+          without dramatic shifts that would better demonstrate the plot type's value
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share evolution is a perfect, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentages are realistic, though all products stay relatively close
+          to initial values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and patch() which are Bokeh fundamentals, but
+          doesn't leverage Bokeh's interactive features like HoverTool which would
+          add tooltips showing exact percentages - a key Bokeh strength
+  verdict: APPROVED
diff --git a/plots/area-stacked-percent/metadata/highcharts.yaml b/plots/area-stacked-percent/metadata/highcharts.yaml
index ff5f729f6d..b7dba1037b 100644
--- a/plots/area-stacked-percent/metadata/highcharts.yaml
+++ b/plots/area-stacked-percent/metadata/highcharts.yaml
@@ -25,3 +25,181 @@ review:
     more precisely
   - 'Legend symbols use symbolRadius: 0 (squares) which is fine but circular markers
     might match the data point markers better'
+  image_description: 'The plot displays a 100% stacked area chart showing "Product
+    Market Share Evolution (2018-2025)". Three distinct colored areas fill the chart
+    from bottom to top: purple (Product C) at the bottom maintaining ~21-25%, yellow
+    (Product B) in the middle declining from ~40% to ~24%, and steel blue (Product
+    A) at the top growing from ~35% to ~55%. The Y-axis shows "Market Share (%)" with
+    values from 0% to 100% in 2% increments. The X-axis shows years from 2018 to 2025.
+    The title "area-stacked-percent · highcharts · pyplots.ai" appears at the top
+    with a subtitle below. A horizontal legend at the bottom shows all three products
+    with colored squares. Data points are marked with small squares on each area boundary.
+    The chart fills the canvas well with balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable.
+          Font sizes are appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Areas are clearly visible with good fill opacity (0.7), markers are
+          appropriately sized (radius 8).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses colorblind-safe palette (blue #306998, yellow #FFD43B, purple
+          #9467BD). No red-green conflicts.'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; plot fills most of the space. Slight extra
+          whitespace at bottom due to legend placement.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Market Share (%)" with units, X-axis has "Year".
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-placed and readable. No visible grid lines (Highcharts
+          default), which is acceptable but a subtle grid could enhance readability.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct 100% stacked area chart with `stacking: "percent"`.'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows time (years), Y-axis shows percentage contributions.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows proportional changes over time, total always 100%, multiple
+          categories stacked.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis correctly shows 0-100%, all years visible.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Product A, B, and C.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "area-stacked-percent · highcharts · pyplots.ai".
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows three trends: growing (Product A), declining (Product B),
+          and stable (Product C). Good variety but could show more dramatic crossover
+          points.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share evolution is a perfect, realistic business scenario
+          for this chart type.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible market share percentages. The data is deterministic
+          but the values chosen are sensible.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart setup → series → export.
+          No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Data is deterministic (no random), but no explicit seed comment.
+          Minor deduction.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of Highcharts-specific features: `stacking: "percent"`,
+          interactive tooltips with `{point.percentage:.1f}%`, proper series configuration.
+          Could leverage more advanced features like data labels or animation settings.'
+  verdict: APPROVED
diff --git a/plots/area-stacked-percent/metadata/letsplot.yaml b/plots/area-stacked-percent/metadata/letsplot.yaml
index 0460ae2fce..055a025f29 100644
--- a/plots/area-stacked-percent/metadata/letsplot.yaml
+++ b/plots/area-stacked-percent/metadata/letsplot.yaml
@@ -22,3 +22,181 @@ review:
   - Year labels display with comma separators (2,016 instead of 2016) which looks
     unnatural for year values
   - The HTML export setup_html() call is unnecessary overhead for PNG-only output
+  image_description: 'The plot shows a 100% stacked area chart displaying market share
+    evolution from 2016 to 2023 for four companies. The y-axis shows "Market Share
+    (%)" ranging from 0% to 100%, and the x-axis shows "Year" from 2016 to 2023. Four
+    colored areas are stacked: Company D (purple) at the bottom, Company C (green)
+    above it, Company B (yellow) in the middle, and Company A (blue) at the top. The
+    areas always sum to 100%. Company A (blue) shows clear growth over time (expanding
+    from ~40% to ~58%), while Company B (yellow) declines. Companies C and D maintain
+    relatively stable small shares. The title reads "area-stacked-percent · letsplot
+    · pyplots.ai". The legend is positioned on the right side. The background is a
+    subtle light gray (#FAFAFA) with light grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend text are all clearly readable
+          at the appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area fills are clearly visible with good alpha (0.85), boundaries
+          between areas are clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Purple, green, yellow, and blue are colorblind-friendly and highly
+          distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout but slight imbalance with extra whitespace on the right
+          due to legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis label "Market Share (%)" is descriptive, but could arguably
+          drop "(%)" since values show percentages
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and good, but year labels show commas (e.g., "2,016"
+          instead of "2016")
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Year (continuous time), Y=Market Share percentages correctly stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: multiple categories stacked, normalized
+          to 100%, shows proportional changes over time'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible from 2016-2023, 0-100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four companies
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "area-stacked-percent · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent demonstration: shows growing market leader, declining
+          competitor, stable small players - covers all typical stacked area scenarios'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share evolution is a perfect, neutral business scenario for
+          this chart type
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, time range is realistic 8-year period, company
+          share values are plausible
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Several unused imports: element_blank, element_line, element_rect
+          are imported but used; however, LetsPlot.setup_html() is called but may
+          not be needed for PNG export'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png but path parameter usage ("path='.'") is unconventional
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2-style grammar with geom_area, position="fill" for 100%
+          stacking, scale_y_continuous with format for percentages. Could leverage
+          more lets-plot specific features like tooltips for interactivity.
+  verdict: APPROVED
diff --git a/plots/area-stacked-percent/metadata/matplotlib.yaml b/plots/area-stacked-percent/metadata/matplotlib.yaml
index c5481f7d77..c60fa18dc1 100644
--- a/plots/area-stacked-percent/metadata/matplotlib.yaml
+++ b/plots/area-stacked-percent/metadata/matplotlib.yaml
@@ -26,3 +26,179 @@ review:
   - X-axis label Year is generic - could be more descriptive like Year (2015-2024)
   - Library features could be enhanced with annotations showing key percentages or
     trend indicators
+  image_description: The plot displays a 100% stacked area chart showing market share
+    evolution for four companies (A, B, C, D) from 2015 to 2024. The color scheme
+    uses dark blue (#306998) for Company A at the bottom, golden yellow (#FFD43B)
+    for Company B, light blue (#4B8BBE) for Company C, and pale yellow (#FFE873) for
+    Company D at the top. The Y-axis shows "Market Share (%)" with tick marks at 0%,
+    25%, 50%, 75%, and 100%. The X-axis shows "Year" with all years from 2015-2024
+    labeled. A legend is positioned in the upper right corner with a semi-transparent
+    background. Horizontal dashed grid lines appear at 25% intervals. The title reads
+    "area-stacked-percent · matplotlib · pyplots.ai". The visualization clearly shows
+    Company A declining from ~45% to ~24%, Company B growing from ~30% to ~41%, Company
+    C steadily increasing from ~15% to ~24%, and Company D remaining stable around
+    10-11%.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text perfectly readable: title at 24pt, axis labels at 20pt,
+          tick labels at 16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, all labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Areas clearly visible with good alpha (0.85), white edge lines separate
+          layers nicely
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Color scheme uses blue/yellow palette which is colorblind-friendly,
+          good contrast between adjacent areas
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with proper margins, balanced whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has units (%), but X-axis "Year" could be more descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed, y-only), legend well placed but
+          slightly overlaps the topmost area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time (years), Y variables are categories stacked correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows percentage contribution, always totals 100%, demonstrates proportional
+          changes over time
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-100%, X-axis shows full data range 2015-2024
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple trends (decline, growth, steady growth, stable), but
+          could show more dramatic composition shifts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share evolution is a perfect real-world application mentioned
+          in spec, uses neutral business context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Market share percentages (10-45%) are realistic for a competitive
+          market
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is deterministic anyway
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API (stackplot, ax methods)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses standard stackplot, which is correct but not distinctive. Could
+          use annotations, fill_between customization, or other matplotlib-specific
+          features
+  verdict: APPROVED
diff --git a/plots/area-stacked-percent/metadata/plotly.yaml b/plots/area-stacked-percent/metadata/plotly.yaml
index 68d94529f4..6d713284ad 100644
--- a/plots/area-stacked-percent/metadata/plotly.yaml
+++ b/plots/area-stacked-percent/metadata/plotly.yaml
@@ -24,3 +24,174 @@ review:
     top of stack
   - Two green shades (Hydro olive and Wind bright) may be difficult to distinguish
     for colorblind users
+  image_description: 'The plot displays a 100% stacked area chart showing energy source
+    market share evolution from 2015 to 2024. Five energy sources are represented:
+    Coal (dark blue, bottom), Natural Gas (yellow/gold), Wind (bright green), Solar
+    (magenta/pink), and Hydro (olive green, top). The chart clearly shows Coal declining
+    from ~40% to ~22%, while renewable sources (Wind, Solar) grow over time. Natural
+    Gas remains relatively stable around 30-32%. The title "area-stacked-percent ·
+    plotly · pyplots.ai" is centered at the top. A horizontal legend sits above the
+    plot area. The Y-axis shows "Market Share (%)" with percentage suffixes (0%-100%),
+    and the X-axis shows "Year" (2015-2024). The layout uses a clean white background
+    with subtle gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, ticks at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stacked areas are clearly visible with good fill colors
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but the olive green (Hydro) and bright
+          green (Wind) could be confused by some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, legend positioned cleanly
+          above
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Market Share (%)" with units, X-axis has "Year"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.1), but legend order is reversed from visual
+          stacking order (Hydro shown first in legend but appears at top of stack)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (years), Y=percentages correctly stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalized to 100%, shows proportional changes over time
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-100%, X-axis covers all years
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All 5 categories correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "area-stacked-percent · plotly · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple categories with varying trends (declining Coal, growing
+          renewables, stable Gas)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Energy source market share is a real, neutral, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages are realistic for energy mix evolution
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses stackgroup with groupnorm="percent", hovertemplate for custom
+          tooltips, update_layout with comprehensive styling, and generates interactive
+          HTML output
+  verdict: APPROVED
diff --git a/plots/area-stacked-percent/metadata/plotnine.yaml b/plots/area-stacked-percent/metadata/plotnine.yaml
index 300f77d93b..2e8e079233 100644
--- a/plots/area-stacked-percent/metadata/plotnine.yaml
+++ b/plots/area-stacked-percent/metadata/plotnine.yaml
@@ -28,3 +28,176 @@ review:
     unnecessary
   - Could benefit from more dramatic proportion shifts to better demonstrate the plot
     type capabilities
+  image_description: 'The plot displays a 100% stacked area chart showing tech product
+    market share evolution from 2015-2024. Four categories are stacked: Laptops (coral/salmon
+    at bottom), Wearables (teal/cyan), Tablets (yellow), and Smartphones (blue at
+    top). The Y-axis ranges from 0% to 100% with clear percentage labels. The X-axis
+    shows years from 2015 to 2023 in 2-year intervals. A legend on the right identifies
+    each category as "Product Category". The title "area-stacked-percent · plotnine
+    · pyplots.ai" is prominently displayed at the top in bold. The chart shows interesting
+    trends: Smartphones declining from ~45% to ~31%, Tablets declining from ~25% to
+    ~9%, Wearables growing dramatically from ~5% to ~28%, and Laptops remaining relatively
+    stable around 25-32%.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large (~24pt), axis labels are clearly readable
+          (~20pt), tick labels are appropriately sized (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Areas are clearly visible with good alpha (0.85), distinct color
+          boundaries
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color contrast, colors are distinguishable but blue/teal could
+          be closer for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas, plot fills appropriate space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Year" and "Market Share (%)" with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha 0.3, but legend could be better positioned
+          (appears slightly cramped)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time (years), Y is percentage, fill is category
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All areas stack to 100%, shows proportional changes over time
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-100% range shown, all years visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "area-stacked-percent · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows rising and falling trends, stable categories, crossover points
+          - could show more dramatic shifts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech market share is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic percentages, though all values conveniently
+          sum to 100 in raw data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) though data is actually deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' but uses verbose=False which is fine
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_area, scale_fill_manual, theme customization.
+          Good use of plotnine's grammar of graphics, but could leverage more advanced
+          features like faceting or annotations
+  verdict: APPROVED
diff --git a/plots/area-stacked-percent/metadata/pygal.yaml b/plots/area-stacked-percent/metadata/pygal.yaml
index ebcbb83d7e..1ba91f8dee 100644
--- a/plots/area-stacked-percent/metadata/pygal.yaml
+++ b/plots/area-stacked-percent/metadata/pygal.yaml
@@ -25,3 +25,179 @@ review:
     visualization where the full 0-100% context matters
   - Data is somewhat artificial with perfectly linear trends and static values (Accessories
     at exactly 4% for all years)
+  image_description: 'The plot displays a 100% stacked area chart showing market share
+    evolution for five tech product categories from 2018 to 2024. The chart uses five
+    distinct colors: deep blue (Smartphones) at the bottom, golden yellow (Laptops),
+    teal green (Tablets), coral/salmon (Wearables), and purple (Accessories) at the
+    top. The stacked areas always sum to 100%, clearly showing proportional shifts
+    over time. The title "area-stacked-percent · pygal · pyplots.ai" appears at the
+    top center. The legend is positioned in the top-left corner with colored squares.
+    Y-axis shows "Market Share (%)" ranging from 40-100, and X-axis shows "Year" with
+    labels from 2018-2024. Small dots mark data points on the area boundaries. Horizontal
+    grid lines are visible at y-axis intervals.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend text are clearly readable at full
+          size; font sizes are well-scaled for 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend is separate from chart area
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Areas are clearly visible with good opacity (0.85); small dots mark
+          data points effectively
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable; blue/green/yellow/coral/purple palette
+          is reasonably colorblind-friendly, though blue and purple could be more
+          distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; plot fills most of the area; slight imbalance
+          with legend in top-left corner outside the main plot area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Market Share (%)" and "Year"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Y-axis starts at ~40 instead of 0, which is misleading for a 100%
+          stacked chart where the visual representation should show the full 0-100%
+          range
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked area chart using pygal's StackedLine with fill=True
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows time (years), Y-axis shows percentage contribution
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows normalized percentages, multiple categories, temporal progression
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible; years 2018-2024 shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series names correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "area-stacked-percent · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple categories with changing proportions over time; demonstrates
+          the key feature of 100% normalization; slight deduction as all trends are
+          relatively smooth/linear
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech product market share is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values sum to 100% as required; percentage values are plausible for
+          market share; some values (e.g., Accessories staying exactly 4% for 7 years)
+          feel slightly artificial
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → style → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values, no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's StackedLine with fill=True, custom Style, render_to_png/html;
+          could leverage more pygal-specific features like tooltips or value formatters
+  verdict: APPROVED
diff --git a/plots/area-stacked-percent/metadata/seaborn.yaml b/plots/area-stacked-percent/metadata/seaborn.yaml
index 570cb8b58b..e264c71cac 100644
--- a/plots/area-stacked-percent/metadata/seaborn.yaml
+++ b/plots/area-stacked-percent/metadata/seaborn.yaml
@@ -22,3 +22,182 @@ review:
   weaknesses:
   - Legend order (Solar, Wind, Hydro, Other top-to-bottom) is reversed from visual
     stack order (Other, Hydro, Wind, Solar bottom-to-top), which can confuse readers
+  image_description: 'The plot displays a 100% stacked area chart showing the evolution
+    of renewable energy sources market share from 2015 to 2024. The chart uses four
+    distinct colors: dark blue (Steel blue, #306998) for Solar at the top, golden
+    yellow (#FFD43B) for Wind in the middle-upper area, turquoise/teal (#4ECDC4) for
+    Hydro in the middle-lower area, and gray (#95A5A6) for Other at the bottom. The
+    Y-axis shows "Share (%)" ranging from 0 to 100, and the X-axis shows "Year" from
+    2015 to 2024. The title follows the correct format: "area-stacked-percent · seaborn
+    · pyplots.ai". The legend is positioned in the upper left with a white background.
+    The plot clearly shows Solar growing from ~10% to ~45%, Wind growing slightly
+    from ~20% to ~35%, Hydro declining from ~50% to ~18%, and Other declining from
+    ~20% to ~2%. The areas stack to exactly 100% throughout. Subtle boundary lines
+    separate each area. The grid is shown only on the Y-axis with dashed lines at
+    low alpha.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, ticks at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Areas clearly visible with good alpha (0.85), boundary lines help
+          distinguish layers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color choices with distinct hues, though yellow-on-white legend
+          could be slightly improved
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend near the data
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has units "Share (%)", X-axis just "Year" without units (acceptable
+          for years)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: 'Grid is subtle (alpha 0.3), but legend order (top-to-bottom: Solar,
+          Wind, Hydro, Other) doesn''t match visual stack order (bottom-to-top: Other,
+          Hydro, Wind, Solar)'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (years), Y=percentage contributions correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalized to 100%, shows proportional changes over time
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-100%, X-axis shows all years 2015-2024
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify all four categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: growth (Solar), steady growth (Wind), decline
+          (Hydro), sharp decline (Other) - excellent variation'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Renewable energy market share is a real, neutral, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for energy market percentages
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though data is actually deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: matplotlib.patches.Patch is imported but could be avoided; all imports
+          are used but sns.lineplot usage is contrived
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current seaborn API used correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.set_theme(), sns.set_context(), and sns.lineplot(). However,
+          the main plotting (fill_between) uses matplotlib directly. Seaborn doesn't
+          have a native stacked area function, so this hybrid approach is acceptable
+          but not ideal.
+  verdict: APPROVED
diff --git a/plots/area-stacked/metadata/altair.yaml b/plots/area-stacked/metadata/altair.yaml
index 7c770c8a41..2ed350fe54 100644
--- a/plots/area-stacked/metadata/altair.yaml
+++ b/plots/area-stacked/metadata/altair.yaml
@@ -24,3 +24,172 @@ review:
   weaknesses:
   - Legend symbols lack visible stroke, making color patches slightly harder to distinguish
     at a glance
+  image_description: 'The plot displays a stacked area chart showing monthly revenue
+    by product category over two years (Jan 2023 - Nov 2024). Four distinct areas
+    are stacked from bottom to top: Software (Python blue #306998), Hardware (Python
+    yellow #FFD43B), Services (teal #5D9B9B), and Support (mauve #A85C5C). The title
+    "area-stacked · altair · pyplots.ai" is centered at the top. The X-axis shows
+    "Month" with rotated date labels (e.g., "Jan 2023", "Mar 2023"), and the Y-axis
+    shows "Revenue ($ thousands)" ranging from 0 to ~300. A legend titled "Product
+    Category" appears on the right side. The chart has subtle dashed grid lines and
+    shows realistic revenue trends with variation over time.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, rotated x-axis labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Areas are well-sized with good opacity (0.85) and visible stroke
+          lines
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, teal, mauve) are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout, chart fills canvas well, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Revenue ($ thousands)", "Month"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), but legend has no stroke on symbols
+          making color identification slightly harder
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, revenue values on Y-axis, stacking correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, stacking, legend, time-based x-axis all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, baseline starts at zero
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data categories correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "area-stacked · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 series with trends, but all series follow similar patterns
+          (general decline in 2024)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly revenue by product category is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in tens to hundreds of thousands are reasonable, though cumulative
+          totals ~280k could use more variation
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Declarative encoding, tooltips, interactive HTML export, proper Vega-Lite
+          stack ordering
+  verdict: APPROVED
diff --git a/plots/area-stacked/metadata/bokeh.yaml b/plots/area-stacked/metadata/bokeh.yaml
index c5228e7da7..3f01a2e5a9 100644
--- a/plots/area-stacked/metadata/bokeh.yaml
+++ b/plots/area-stacked/metadata/bokeh.yaml
@@ -25,3 +25,180 @@ review:
     harder to distinguish
   - Missing HoverTool which is a distinctive Bokeh feature for interactivity
   - Series not strictly ordered by size (largest at bottom) as suggested in spec Notes
+  image_description: The plot displays a stacked area chart showing monthly revenue
+    by product category (Electronics, Clothing, Home & Garden, Sports) over 24 months
+    from January 2023 to December 2024. The areas are stacked from bottom to top with
+    Sports (light yellow) at the base, then Home & Garden (light blue), Clothing (golden
+    yellow), and Electronics (dark blue/Python blue) on top. The legend is positioned
+    on the right side outside the plot area. The title "area-stacked · bokeh · pyplots.ai"
+    appears at the top left. X-axis labels show months in "Mon YYYY" format at an
+    angle, and Y-axis shows "Revenue ($K)" ranging from 0 to ~500. The plot has a
+    light gray background with dashed grid lines. The total revenue shows an upward
+    trend from ~400K to ~490K over the period.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable at full size.
+          Font sizes are appropriately scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. X-axis labels are angled to avoid collision.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Stacked areas are clearly visible with good fill alpha (0.85). Areas
+          are well-defined with clear boundaries.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but the two yellow shades (Clothing and
+          Sports) could be confused by some viewers. Not strictly red-green issue
+          but similar luminance.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with the plot filling most of the space.
+          Legend is well-placed on the right.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Revenue ($K)", X-axis has "Month" - both descriptive.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha 0.3. Legend is well-styled
+          but placed outside the plot area which is fine.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked area chart implementation using Bokeh varea.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis is time (months), Y values are stacked correctly.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: multiple series stacked, legend included,
+          semi-transparent fills, baseline at zero.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis extends slightly beyond max value.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify each category.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "area-stacked · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trends (Electronics growing), seasonality (Clothing with sine
+          wave pattern), and composition changes over time. Could show more dramatic
+          compositional shifts.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly revenue by product category is a realistic e-commerce scenario
+          with plausible trends.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values in $K range (50-220 per category) are realistic for
+          a mid-size retailer. Total ~400-500K monthly is plausible.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save. No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, bokeh components).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but also saves plot.html which is appropriate
+          for Bokeh.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, varea, Legend, FixedTicker properly. Could
+          leverage more Bokeh-specific features like HoverTool for interactivity.
+  verdict: APPROVED
diff --git a/plots/area-stacked/metadata/highcharts.yaml b/plots/area-stacked/metadata/highcharts.yaml
index 598a317ac7..4d5031bb78 100644
--- a/plots/area-stacked/metadata/highcharts.yaml
+++ b/plots/area-stacked/metadata/highcharts.yaml
@@ -23,3 +23,178 @@ review:
   weaknesses:
   - Legend background color creates slight visual disconnection from the chart area
   - Grid lines could be slightly more subtle (currently visible but not distracting)
+  image_description: |-
+    The plot displays a stacked area chart showing monthly revenue by product category from January 2023 through December 2024. Four distinct colored areas are stacked vertically:
+    - **Blue (#306998)** - Electronics (largest area, top of stack)
+    - **Yellow (#FFD43B)** - Software (second from top)
+    - **Purple (#9467BD)** - Services (third from top)
+    - **Cyan (#17BECF)** - Accessories (smallest, at bottom)
+
+    The chart has clear axis labels ("Revenue ($ thousands)" on Y-axis, month labels on X-axis), a title "area-stacked · highcharts · pyplots.ai" with subtitle "Monthly Revenue by Product Category (2023-2024)", and a legend positioned in the top-right corner. The stacked areas show cumulative totals reaching peaks around 625 in Dec 2023 and 735 in Dec 2024. The baseline starts at zero and the chart effectively demonstrates both individual category contributions and overall trends.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend all clearly readable
+          at proper font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area fills are clearly visible with good fillOpacity (0.75), distinct
+          boundaries
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, purple, cyan) - no red-green
+          conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout, but legend placement in top-right slightly crowds the
+          chart area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "Revenue ($ thousands)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines are subtle, but legend has white background that slightly
+          disrupts visual flow
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked area chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows months (time), Y-axis shows revenue values correctly
+          stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, legend, stacked areas,
+          baseline at zero'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range, Y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "area-stacked · highcharts · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple series (4 categories), seasonal patterns, year-over-year
+          growth, relative contributions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly revenue by product category is a plausible business scenario
+          matching spec examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values in thousands (25-310K) are realistic for product categories
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Code is linear but has a for loop for series creation which is acceptable,
+          some complexity with Selenium setup
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic/hardcoded data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates plot.html (expected for interactive
+          library)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses Highcharts stacking, shared tooltips, and series configuration,
+          but could leverage more interactive features like data labels or export
+          options
+  verdict: APPROVED
diff --git a/plots/area-stacked/metadata/letsplot.yaml b/plots/area-stacked/metadata/letsplot.yaml
index 3d4bb984f5..1d4d5f37a4 100644
--- a/plots/area-stacked/metadata/letsplot.yaml
+++ b/plots/area-stacked/metadata/letsplot.yaml
@@ -27,3 +27,178 @@ review:
     increasing grid alpha
   - Color palette includes red-green combination which may be challenging for colorblind
     users; consider using a fully colorblind-safe palette
+  image_description: 'The plot displays a stacked area chart showing monthly revenue
+    by product category over a 2-year period (Jan 2023 to Dec 2024). Four categories
+    are stacked: Electronics (blue, bottom), Clothing (yellow), Home & Garden (green),
+    and Sports (crimson/red, top). The chart uses smooth filled areas with white borders
+    between layers, showing clear seasonal patterns with peaks around early spring
+    months. The y-axis shows "Revenue (Thousands USD)" ranging from 0 to 220, and
+    the x-axis shows "Month" with five labeled time points. A legend on the right
+    identifies each category. The title "area-stacked · letsplot · pyplots.ai" appears
+    at the top in bold.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large (~24pt), axis labels are clearly readable
+          (~20pt), tick labels are appropriately sized (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area fills are clearly visible with good alpha (0.85), white borders
+          between layers improve distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinct (blue, yellow, green, red) but red-green combination
+          could be problematic for some colorblind users (-1)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend is appropriately placed on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Revenue (Thousands USD)", X-axis has descriptive
+          "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines are barely visible or absent on the plot; horizontal grid
+          would improve readability
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked area chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, revenue values stacked on y-axis, categories properly
+          mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has stacking, multiple series, legend, proper baseline at zero
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0 as required
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data categories correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "area-stacked · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows stacking, multiple categories, trends over time, and seasonality
+          (-1 for not showing crossover between categories which could demonstrate
+          more dynamic compositions)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Revenue by product category over 2 years is a realistic, relatable
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in thousands USD are reasonable for revenue; however the specific
+          scale (60-220K) could be more clearly representative (-1)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used, no unused imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to `plot.png` but ggsave uses path="." which is correct (-0)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, geom_area with position="stack", scale_fill_manual,
+          theme_minimal customization. Could leverage more lets-plot specific features
+          like tooltips or interactive capabilities for HTML output.
+  verdict: APPROVED
diff --git a/plots/area-stacked/metadata/matplotlib.yaml b/plots/area-stacked/metadata/matplotlib.yaml
index e8c6859fb9..7887fe55e3 100644
--- a/plots/area-stacked/metadata/matplotlib.yaml
+++ b/plots/area-stacked/metadata/matplotlib.yaml
@@ -27,3 +27,171 @@ review:
   - Axis labels lack units
   - Could benefit from more distinct color separation between adjacent green and purple
     bands for colorblind accessibility
+  image_description: 'The plot displays a stacked area chart showing monthly website
+    visitors over 24 months (Jan 2023 to Dec 2024). Four traffic source categories
+    are stacked: Organic Search (blue, bottom), Direct (yellow), Social Media (green),
+    and Referral (purple, top). The y-axis shows Monthly Visitors ranging from 0 to
+    approximately 110,000. The title "area-stacked · matplotlib · pyplots.ai" is displayed
+    at the top. A legend in the upper left identifies the four categories. The grid
+    uses subtle dashed horizontal lines. All areas show realistic growth trends with
+    some variation over time.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stacked areas are clearly visible with good alpha (0.85)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but blue/green could be slightly more
+          distinct for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("Month", "Monthly Visitors") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), legend well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (months), Y=visitor counts correctly stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has distinct colors, legend, stacking, semi-transparent fills
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at zero, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data categories correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows stacking and cumulative totals well, but series sizes are quite
+          similar; spec suggests ordering largest at bottom which is done, but more
+          variation between series would better demonstrate the stacked nature
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic sources is a perfect real-world scenario mentioned
+          in spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Visitor counts in tens of thousands are realistic for a medium-sized
+          website
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ax.stackplot which is matplotlib's native stacked area function,
+          but doesn't leverage more advanced features like color cycling or annotations
+  verdict: APPROVED
diff --git a/plots/area-stacked/metadata/plotly.yaml b/plots/area-stacked/metadata/plotly.yaml
index bf7c91e108..ae83d98072 100644
--- a/plots/area-stacked/metadata/plotly.yaml
+++ b/plots/area-stacked/metadata/plotly.yaml
@@ -28,3 +28,180 @@ review:
     demonstrate compositional changes
   - Axis labels lack units
   - Could leverage more plotly-specific features like custom hover templates
+  image_description: 'The plot displays a stacked area chart showing website traffic
+    sources over 24 months (Jan 2023 to Dec 2024). Four distinct traffic sources are
+    stacked: Organic Search (blue, at the bottom), Direct (yellow), Social Media (green),
+    and Referral (pink, at the top). The y-axis shows "Monthly Visitors" ranging from
+    0 to approximately 85,000 using "k" notation. The x-axis shows "Month" with quarterly
+    date labels. The title reads "Website Traffic Sources · area-stacked · plotly
+    · pyplots.ai" with a horizontal legend placed above the chart. All four series
+    show upward trends over time, with Organic Search being the largest contributor.
+    The chart uses semi-transparent fills with subtle grid lines on a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend positioned cleanly above chart
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Areas are clearly visible with 0.7 alpha transparency, good differentiation
+          between stacked layers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, green, pink) provide reasonable
+          contrast, though pink/red and green together are not ideal for colorblind
+          users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, chart fills ~65% of space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Monthly Visitors" and "Month" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at 0.1 alpha, legend well placed but could benefit from
+          box/border
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked area chart using stackgroup
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis is time (months), Y-axis is numeric values, properly stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, legend, semi-transparent
+          fills, baseline at zero'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at zero with rangemode="tozero", all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify all four traffic sources
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Website Traffic Sources · area-stacked · plotly
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows upward trends, multiple series stacking, variation between
+          categories. Minor: all series trend upward, could show some declining series
+          for more variety'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic sources is a classic, realistic application for stacked
+          area charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in 5k-45k range are realistic for web traffic; total reaching
+          ~85k is plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and plotly.graph_objects imported, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (4800x2700 via 1600x900 scale=3) and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses stackgroup for proper stacking, hovermode="x unified" for interactive
+          features, HTML export. Could leverage more plotly-specific features like
+          hover templates or annotations
+  verdict: APPROVED
diff --git a/plots/area-stacked/metadata/plotnine.yaml b/plots/area-stacked/metadata/plotnine.yaml
index 274f090661..5380f08de8 100644
--- a/plots/area-stacked/metadata/plotnine.yaml
+++ b/plots/area-stacked/metadata/plotnine.yaml
@@ -22,3 +22,172 @@ review:
   - Proper date handling with scale_x_date and formatted quarterly labels
   weaknesses:
   - Y-axis label could include units like (thousands) for clarity
+  image_description: 'The plot displays a stacked area chart showing website traffic
+    sources over 24 months (January 2023 to January 2025). Four colored areas are
+    stacked: Social Media (red) at the bottom, Referral (green), Direct (yellow),
+    and Organic Search (blue) at the top. The y-axis shows "Monthly Visitors" ranging
+    from 0 to approximately 65,000. The x-axis shows "Month" with quarterly date labels.
+    The title reads "area-stacked · plotnine · pyplots.ai" in bold. A legend labeled
+    "Traffic Source" appears on the right side. The chart uses a minimal theme with
+    subtle gridlines and has good visual balance.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 24pt, axis titles 20pt, axis text 16pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis labels rotated 45° to avoid collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Areas clearly visible with 0.85 alpha, good stacking visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Python blue, yellow, green, red - distinct and colorblind-friendly
+          palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend positioned on right, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Monthly Visitors" and "Month" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with alpha 0.3, legend well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, visitors on y-axis, categories stacked correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, legend, ordered by size,
+          semi-transparent fills, baseline at zero'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four traffic sources
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "area-stacked · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows 4 series with varying trends: organic growing, social growing,
+          direct stable with fluctuation, referral stable'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic sources is a real-world application mentioned in
+          the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Visitor counts in thousands are realistic for website analytics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of plotnine''s grammar of graphics: ggplot + aes +
+          geom_area with position="stack", scale_fill_manual, scale_x_date, theme_minimal
+          with extensive customization'
+  verdict: APPROVED
diff --git a/plots/area-stacked/metadata/pygal.yaml b/plots/area-stacked/metadata/pygal.yaml
index a8c85b9211..02735a5448 100644
--- a/plots/area-stacked/metadata/pygal.yaml
+++ b/plots/area-stacked/metadata/pygal.yaml
@@ -25,3 +25,176 @@ review:
     would be cleaner
   - Grid lines could be more subtle (current y_guides are acceptable but could use
     lower opacity)
+  image_description: 'The plot displays a stacked area chart showing monthly revenue
+    by product category over 2 years (Jan 2023 - Dec 2024). Four categories are shown:
+    Electronics (blue, bottom), Clothing (yellow), Home & Garden (green), and Books
+    (red, top). The areas are properly stacked, with the cumulative total reaching
+    peaks around 520K in December periods, showing clear holiday shopping seasonality.
+    The legend is positioned in the top-left corner with colored squares. X-axis labels
+    show month abbreviations at 45-degree rotation, Y-axis shows "Revenue ($K)". The
+    title "area-stacked · pygal · pyplots.ai" appears at the top center. The chart
+    has a white background with subtle horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend are clearly readable. Tick labels
+          are slightly small but legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; x-axis labels are well-spaced with
+          rotation.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stacked areas are clearly visible with good opacity (0.85).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable; blue/green could be challenging for some
+          colorblind users but overall acceptable.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, legend placement is functional.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Revenue ($K)" with units, X-axis has "Month".
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend position in top-left overlaps slightly with the plot area;
+          could be better placed.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked area chart using StackedLine with fill=True.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis is time (months), Y values are stacked revenue series.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series stacked, legend present, distinct colors, baseline
+          at zero.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match series names correctly.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "area-stacked · pygal · pyplots.ai" format.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation, growth trends, multiple series with different
+          patterns. Could show more dramatic differences between categories.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly retail revenue by product category is a realistic, comprehensible
+          scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values in $K range are realistic for retail; holiday peaks
+          are plausible.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → style → chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data, no random elements.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style are imported and used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (minor issue, PNG is primary).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's StackedLine with fill, custom Style, SVG-native rendering
+          to PNG. Could leverage more pygal features like tooltips configuration or
+          value_formatter.
+  verdict: APPROVED
diff --git a/plots/area-stacked/metadata/seaborn.yaml b/plots/area-stacked/metadata/seaborn.yaml
index e524e770eb..008f942c2c 100644
--- a/plots/area-stacked/metadata/seaborn.yaml
+++ b/plots/area-stacked/metadata/seaborn.yaml
@@ -26,3 +26,174 @@ review:
     right
   - Data series all follow similar sinusoidal patterns - more variety in trends would
     better demonstrate stacked area features
+  image_description: 'The plot displays a stacked area chart showing monthly revenue
+    by product category over a 2-year period (Feb 2023 to Feb 2025). Four product
+    categories are shown: Electronics (dark blue/Python blue, #306998) at the bottom,
+    Clothing (golden yellow, #FFD43B), Home & Garden (teal/turquoise, #4ECDC4), and
+    Sports (coral/salmon, #FF6B6B) at the top. The Y-axis shows "Revenue (Million
+    $)" ranging from 0 to ~145, and the X-axis shows months with quarterly intervals.
+    A legend titled "Product Category" is positioned in the upper left. The areas
+    show seasonal fluctuations with an overall growth trend. The baseline correctly
+    starts at zero.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis labels rotated 45° for clarity
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area fills are clearly visible with good alpha (0.85), distinct boundaries
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color contrast between categories, though not a standard colorblind-safe
+          palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout, plot fills most of canvas, slight margin imbalance on
+          right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Revenue (Million $)", X-axis labeled "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid has alpha=0.3 (good), but legend partially overlaps with data
+          area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, revenue values stacked on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has legend, distinct colors, baseline at zero, series ordered by
+          size
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series accurately
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "area-stacked · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows seasonal variation and growth trend, but all series follow
+          similar patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly revenue by product category is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values in millions are realistic for a retail business
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Only uses sns.set_theme() for styling; stackplot is from matplotlib,
+          not a seaborn function
+  verdict: APPROVED
diff --git a/plots/band-basic/metadata/altair.yaml b/plots/band-basic/metadata/altair.yaml
index 4785a13adf..6cdeb294eb 100644
--- a/plots/band-basic/metadata/altair.yaml
+++ b/plots/band-basic/metadata/altair.yaml
@@ -25,3 +25,175 @@ review:
   weaknesses:
   - Y-axis label Signal Amplitude lacks units (should include units like mV)
   - No legend or annotation explaining what the band represents (e.g. 95% CI)
+  image_description: The plot displays a band plot with a light blue semi-transparent
+    confidence band between two boundary lines, with a solid dark blue central trend
+    line running through the middle. The band shows a sinusoidal pattern with linear
+    upward growth, with amplitude ranging from approximately -3 to 10 on the y-axis.
+    The x-axis shows "Time (s)" from 0.0 to 10.0, and the y-axis shows "Signal Amplitude".
+    The confidence band appropriately widens over time (from ~1.0 at t=0 to ~3.5 at
+    t=10), demonstrating realistic uncertainty growth. The title "band-basic · altair
+    · pyplots.ai" is displayed at the top. The plot has a clean white background with
+    subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Band opacity (0.3) allows visibility, line width (4) makes central
+          trend clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, 16:9 aspect ratio, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis "Signal Amplitude" is descriptive but lacks units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3) but no legend explaining what the band
+          represents
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct band/area plot with central trend line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: x mapped to time, y_lower/y_upper define band, y_center defines trend
+          line
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has band with semi-transparent fill, central trend line in contrasting
+          style, smooth interpolation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-color band plot (no legend needed for basic version)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "band-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows widening uncertainty over time, sinusoidal pattern with linear
+          growth - demonstrates most band plot features but could show additional
+          scenarios
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Time series with 95% confidence interval - plausible scientific/engineering
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for signal amplitude but units are abstract
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Altair's declarative approach with mark_area, y/y2
+          encoding for bands, layered composition (band + line), configure_* for styling
+  verdict: APPROVED
diff --git a/plots/band-basic/metadata/bokeh.yaml b/plots/band-basic/metadata/bokeh.yaml
index f12a8c611e..626b00a4b3 100644
--- a/plots/band-basic/metadata/bokeh.yaml
+++ b/plots/band-basic/metadata/bokeh.yaml
@@ -22,3 +22,171 @@ review:
   - Axis labels lack units (e.g., "Time (s)" or "Value (units)")
   - Could use Bokeh native varea() glyph instead of patch() for cleaner band implementation
   - Legend text appears slightly small relative to the canvas size
+  image_description: The plot shows a band plot with a light blue semi-transparent
+    filled region representing a 95% confidence interval. A yellow/gold center line
+    runs through the middle of the band representing the mean trend. The band starts
+    narrow on the left (around x=0) and progressively widens toward the right (x=10),
+    demonstrating growing uncertainty over time. The title "band-basic · bokeh · pyplots.ai"
+    appears at the top left. The x-axis is labeled "Time" (0-10) and y-axis is labeled
+    "Value" (approximately 1-10). A legend in the top-left corner shows "95% Confidence
+    Interval" and "Mean Trend". The background is white with subtle gray grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are readable, though legend text
+          appears slightly small relative to the high-resolution canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Band and center line are clearly visible with appropriate alpha and
+          line width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue band and yellow line provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though slight whitespace imbalance on right edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Time", "Value") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), legend is well-placed but could have
+          better background contrast
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct band plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (time), y_lower, y_upper, and y_center correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: semi-transparent band, center line in
+          contrasting color'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly describe the elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "band-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows growing uncertainty well, though could demonstrate more variation
+          in the trend
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Time series with 95% CI is plausible, generic but appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for the context
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports included
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource appropriately but patch() for band is basic;
+          could use varea() for cleaner band implementation
+  verdict: APPROVED
diff --git a/plots/band-basic/metadata/highcharts.yaml b/plots/band-basic/metadata/highcharts.yaml
index 747d83b3df..5447b94206 100644
--- a/plots/band-basic/metadata/highcharts.yaml
+++ b/plots/band-basic/metadata/highcharts.yaml
@@ -27,3 +27,176 @@ review:
   - Uses raw dictionary configuration instead of highcharts-core Python library classes
     as shown in library rules
   - Legend could be slightly larger for the 4800x2700 canvas size
+  image_description: 'The plot displays a band chart with a light blue semi-transparent
+    area representing the 95% confidence interval, and a golden-yellow center line
+    showing the mean value. The visualization follows a sinusoidal pattern over time
+    (x-axis: 0-10), with values ranging approximately from 24 to 104 on the y-axis.
+    The uncertainty band visibly widens as time increases, demonstrating heteroscedastic
+    behavior. The title "band-basic · highcharts · pyplots.ai" appears at the top
+    in bold, with a subtitle "Time series with 95% confidence interval". A legend
+    in the top-right corner identifies both the confidence interval band and mean
+    value line. The background is clean white with subtle dashed grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 64px, axis labels at 48px, tick labels at 36px - all perfectly
+          readable at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Band and line clearly visible with appropriate opacity (0.3 for band),
+          line width of 6
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and high
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions with appropriate margins, slight excess whitespace
+          on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Time", "Value") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed style and 0.1 alpha, legend well-placed
+          but could be larger
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct band/arearange chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X mapped to time, Y to value with proper upper/lower bounds
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Band with upper/lower bounds, center line, semi-transparent fill
+          (0.3 alpha)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "95% Confidence Interval" and "Mean Value"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "band-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows sinusoidal trend with heteroscedastic uncertainty (widening
+          with x), demonstrates key band plot features
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Time series with confidence interval is plausible; generic "Value"
+          label slightly reduces real-world applicability
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in reasonable 24-104 range, 50 data points appropriate for
+          smooth band
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using raw dict config instead of highcharts-core library classes
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses arearange series type correctly, but doesn't use highcharts-core
+          Python library as recommended
+  verdict: APPROVED
diff --git a/plots/band-basic/metadata/letsplot.yaml b/plots/band-basic/metadata/letsplot.yaml
index 577f6e3546..54c686963a 100644
--- a/plots/band-basic/metadata/letsplot.yaml
+++ b/plots/band-basic/metadata/letsplot.yaml
@@ -23,3 +23,177 @@ review:
   weaknesses:
   - Grid lines are solid gray rather than using subtle alpha for better visual subtlety
   - Does not leverage lets-plot distinctive interactive features or tooltips
+  image_description: The plot displays a band plot with a light blue semi-transparent
+    filled region (confidence band) spanning from approximately -2 to 9 on the y-axis,
+    covering the full x-axis range of 0 to 10 seconds. A darker blue central trend
+    line follows a sinusoidal pattern with upward linear trend (sin wave superimposed
+    on positive slope). The band width increases from left to right, illustrating
+    growing uncertainty over time. The title "band-basic · letsplot · pyplots.ai"
+    appears at the top left. Axis labels show "Time (s)" on x-axis and "Value (units)"
+    on y-axis. The background uses a minimal theme with subtle gray grid lines. All
+    text is clearly readable.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          resolution with appropriate font sizes (24pt title, 20pt labels, 16pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Central line has good thickness (size=1.5), band is appropriately
+          filled with alpha=0.3
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) with good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, data fills the plot area well with appropriate
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Time (s)" and "Value
+          (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid uses solid gray lines rather than subtle alpha; no legend needed
+          but grid could be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct band plot using geom_ribbon for the filled region
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X mapped correctly, y_lower/y_upper define band, y_center shown as
+          line
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: semi-transparent fill, central line in
+          contrasting style, smooth interpolation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series band plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "band-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows widening confidence interval (heteroscedasticity), sinusoidal
+          pattern with trend, 100 data points
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Time series with 95% CI is a realistic scenario (sensor data, forecasts,
+          etc.)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in sensible range (-2 to 9), time in seconds (0-10s)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets_plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic geom_ribbon and geom_line, which are standard ggplot features.
+          Could leverage lets_plot's tooltips, interactivity, or geom_smooth for more
+          distinctive usage
+  verdict: APPROVED
diff --git a/plots/band-basic/metadata/matplotlib.yaml b/plots/band-basic/metadata/matplotlib.yaml
index c8ccb7d91a..4966aedb5d 100644
--- a/plots/band-basic/metadata/matplotlib.yaml
+++ b/plots/band-basic/metadata/matplotlib.yaml
@@ -24,3 +24,168 @@ review:
     Measurement (units)
   - Data scenario is plausible but generic - could use a more concrete real-world
     context (e.g., stock price forecast, temperature prediction)
+  image_description: The plot displays a band plot with a sinusoidal central trend
+    line (dark blue, solid) oscillating between approximately 4 and 11 on the y-axis,
+    with an overlaid semi-transparent light blue band representing the 95% confidence
+    interval. The band visibly widens as x (time) increases from 0 to 10 seconds,
+    demonstrating forecasting uncertainty that grows over time. Dashed boundary lines
+    subtly mark the upper and lower bounds. The title reads "band-basic · matplotlib
+    · pyplots.ai" in a large font at the top. Axis labels show "Time (s)" on the x-axis
+    and "Value" on the y-axis. A legend in the upper left identifies both the confidence
+    interval band and mean trend line. A subtle grid with dashed lines provides reference.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Band and line clearly visible with appropriate alpha and linewidth
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Value" lacks units (could be more descriptive)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.3, legend well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct band plot using fill_between
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time, y_lower/y_upper define band correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has band, central line, and boundary lines as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: band-basic · matplotlib · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows widening uncertainty over time (good), sinusoidal pattern shows
+          variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Time series with confidence interval is a plausible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are sensible but somewhat generic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: fill_between is matplotlib's core feature for bands, but no advanced
+          features used
+  verdict: APPROVED
diff --git a/plots/band-basic/metadata/plotly.yaml b/plots/band-basic/metadata/plotly.yaml
index 82c9d44ef9..2b744a0a1b 100644
--- a/plots/band-basic/metadata/plotly.yaml
+++ b/plots/band-basic/metadata/plotly.yaml
@@ -23,3 +23,178 @@ review:
   - Axis labels lack units (e.g., "Time (s)" or "Value (units)")
   - Does not leverage Plotly's distinctive interactive features like custom hover
     templates showing the exact confidence bounds
+  image_description: The plot displays a band chart with a light blue semi-transparent
+    filled region representing a 95% confidence interval, and a darker blue solid
+    trend line running through the center. The band starts narrow on the left (around
+    x=0) and progressively widens toward the right (x=10), illustrating heteroscedasticity
+    (uncertainty growing over time). The title "band-basic · plotly · pyplots.ai"
+    is centered at the top in a large font. The x-axis is labeled "Time" (ranging
+    0-10), and the y-axis is labeled "Value" (ranging ~1-10). A legend in the upper-left
+    corner identifies both the "95% Confidence Interval" (band) and "Trend Line".
+    The background is clean white with subtle gray grid lines. The overall layout
+    is well-balanced with good use of the 16:9 aspect ratio.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend text are all clearly readable
+          at full size with appropriately large fonts
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend is positioned in upper-left
+          without covering data
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Band transparency (alpha 0.3) is optimal, trend line width (4) is
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue color scheme is colorblind-safe; good contrast between band
+          fill and line
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent proportions, good margins, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("Time", "Value") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), legend well-placed with semi-transparent
+          background
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct band/area plot showing filled region between boundaries
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X mapped to time, y_lower/y_upper define band, y_center shown as
+          trend line
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: band, central line, semi-transparent
+          fill, smooth interpolation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify both elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "band-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Demonstrates key band plot features: widening uncertainty (heteroscedasticity),
+          smooth trend with slight curvature (sine component)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Time series with 95% CI is plausible, but context is generic ("Time"
+          vs specific domain)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for a generic measurement scenario
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic go.Scatter with fill="toself" technique, which is standard
+          Plotly but doesn't leverage distinctive features like hover templates, rangeslider,
+          or animations
+  verdict: APPROVED
diff --git a/plots/band-basic/metadata/plotnine.yaml b/plots/band-basic/metadata/plotnine.yaml
index f1d2601092..0d40ccb1f3 100644
--- a/plots/band-basic/metadata/plotnine.yaml
+++ b/plots/band-basic/metadata/plotnine.yaml
@@ -23,3 +23,173 @@ review:
   - Axis labels lack units (e.g., "Time (days)" or "Predicted Value (units)")
   - Data could show both narrowing and widening uncertainty regions for better feature
     coverage
+  image_description: The plot shows a band plot with a semi-transparent light blue
+    (#306998 with alpha 0.3) filled region representing a 95% confidence interval.
+    A dark blue central trend line runs through the middle of the band. The x-axis
+    shows "Time" ranging from 0.0 to 10.0, and the y-axis shows "Predicted Value"
+    ranging from approximately 3 to 12. The title reads "Model Forecast with 95% Confidence
+    Interval · band-basic · plotnine · pyplots.ai". The confidence band widens as
+    time increases (heteroscedastic uncertainty), which is realistic for forecasting
+    scenarios. The background uses a minimal theme with subtle gray grid lines. Text
+    is clearly readable at all sizes.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Band with appropriate alpha (0.3), line properly visible (size=2)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme with clear contrast between line and band
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, 16:9 aspect ratio with tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Time" and "Predicted Value" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle with alpha 0.3, minor grid disabled, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct band/ribbon plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time, y_lower/y_upper define band boundaries, y_center is trend
+          line
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has semi-transparent band, central line in contrasting style, smooth
+          interpolation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series band plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Model Forecast with 95% Confidence Interval · band-basic
+          · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows heteroscedastic uncertainty (widening band), smooth trend,
+          realistic CI bounds using 1.96 multiplier. Minor: could show both narrowing
+          and widening regions'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Model forecast scenario is highly realistic and commonly used
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: '50 points is appropriate, values sensible. Minor: y-axis values
+          are unitless'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of ggplot grammar: geom_ribbon for band, aes mapping,
+          theme customization, element_text/element_line for styling'
+  verdict: APPROVED
diff --git a/plots/band-basic/metadata/seaborn.yaml b/plots/band-basic/metadata/seaborn.yaml
index 8803e5219f..a56d30a414 100644
--- a/plots/band-basic/metadata/seaborn.yaml
+++ b/plots/band-basic/metadata/seaborn.yaml
@@ -24,3 +24,162 @@ review:
   - The fill_between function is from matplotlib rather than a seaborn-specific function;
     while seaborn does not have a native band/fill_between equivalent, the implementation
     correctly uses sns.lineplot for the central line
+  image_description: The plot displays a band chart showing a 95% confidence interval
+    around a mean trend line. The band is rendered in a semi-transparent light blue
+    (#306998 with alpha 0.3), representing the confidence interval that expands over
+    time (heteroscedastic behavior). The central trend line is rendered in bright
+    yellow/gold (#FFD43B) with a linewidth of 3. The data shows a sinusoidal pattern
+    with an upward linear trend, ranging from approximately -1 to 8 on the amplitude
+    axis and 0 to 10 on the time axis. The title "band-basic · seaborn · pyplots.ai"
+    is clearly visible at the top. Axis labels "Time (s)" and "Amplitude" are well-sized.
+    A legend in the upper left corner identifies both elements. The grid uses subtle
+    dashed lines. Overall, the layout is clean and professional with excellent readability.
+  criteria_checklist:
+    visual_quality:
+      score: 40
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text perfectly readable: title 24pt, labels 20pt, ticks 16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Band with alpha=0.3 and line with linewidth=3 are perfectly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent proportions and whitespace distribution
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Time (s)", "Amplitude"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha=0.3, dashed), well-placed legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct band plot using fill_between
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: x, y_lower, y_upper, y_center all correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Band with transparency, central line in contrasting color
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes appropriately scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately describes "95% Confidence Interval" and "Mean Trend"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: band-basic · seaborn · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows expanding uncertainty over time (heteroscedasticity), demonstrates
+          band behavior excellently
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Time series with 95% CI bounds is a realistic scientific application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sensible amplitude and time values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.pyplot, numpy, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-3d/metadata/altair.yaml b/plots/bar-3d/metadata/altair.yaml
index 6a4bcaf150..def526fd44 100644
--- a/plots/bar-3d/metadata/altair.yaml
+++ b/plots/bar-3d/metadata/altair.yaml
@@ -25,3 +25,172 @@ review:
     correspond to actual sales values
   - Quarter labels positioning could be clearer - they overlap with depth axis indicator
   - Spec suggests semi-transparent bars for revealing occluded bars which is not implemented
+  image_description: The plot displays a 3D isometric bar chart showing quarterly
+    sales data across four products (Product A, B, C, D). Each bar is rendered with
+    three faces (front, top, side) to create a 3D effect, using the viridis color
+    scale to encode sales values from ~56K to 200K. The bars are arranged in a grid
+    with products on the x-axis and quarters (Q1-Q4) progressing into the depth dimension.
+    The title reads "bar-3d · altair · pyplots.ai" with a subtitle "Quarterly Sales
+    by Product (Isometric 3D Projection)". A color legend for "Sales ($K)" is positioned
+    on the right. Quarter labels (Q1-Q4) are visible on the right side of the chart,
+    and an angled "Quarters →" indicator is shown. Product B has the highest bar (yellow,
+    ~200K), while Product C shows the lowest values (purple/blue tones).
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable; tick labels slightly dense
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 3D bar faces are visible; some occlusion but depth ordering is correct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; slight imbalance with more space on left
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis shows "Sales Revenue (Relative Height)" but the actual scale
+          values don't map directly to sales amounts
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; legend well placed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements 3D bar chart using isometric projection
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (products), Y (quarters as depth), Z (sales as height) correctly
+          mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has color gradient for z-values; interactivity enabled; minor: no
+          transparency for occluded bars'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows sales scale
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title format correct; subtitle adds context but y-axis label is misleading
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation across products and quarters; seasonal patterns and
+          growth trends visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales by product is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values ($56K-$200K) are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script structure with imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of mark_rect, tooltips, and interactive(); could better
+          leverage Altair's declarative grammar for the layering
+  verdict: APPROVED
diff --git a/plots/bar-3d/metadata/bokeh.yaml b/plots/bar-3d/metadata/bokeh.yaml
index c802f43f5a..b1e040d0f3 100644
--- a/plots/bar-3d/metadata/bokeh.yaml
+++ b/plots/bar-3d/metadata/bokeh.yaml
@@ -23,3 +23,184 @@ review:
   - Realistic business scenario with meaningful variation in data
   weaknesses:
   - Title format uses hyphens/dashes instead of middle dots as required by SC-06
+  image_description: 'The plot displays a 3D bar chart showing quarterly sales data
+    across 5 products (A through E) over 4 quarters (Q1-Q4). The visualization uses
+    an isometric projection with bars rendered as 3D rectangular prisms. Colors are
+    mapped using the Viridis palette, ranging from dark blue (low values ~45) through
+    green to bright yellow (high values ~130). Product C has the tallest bars (yellow/green,
+    ~120-130 range), Product D has the shortest (dark blue, ~45-60 range). The chart
+    includes: three axes (Products, Quarters, and implied Z for height), clear product
+    labels along one axis, quarter labels along another, a color bar on the right
+    showing "Sales (thousands)" with scale from 0-130, and a title "Quarterly Sales
+    by Product - bar-3d - bokeh - pyplots.ai". The bars have semi-transparency (alpha=0.75)
+    with subtle edge lines, and face shading to give 3D depth perception. Background
+    is light gray with subtle dotted grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title 44pt, axis titles 32pt, labels 26pt - all clearly readable.
+          Minor: title uses hyphen not middle dot'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels well-spaced along projected axes
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars clearly visible with good 3D shading; semi-transparency reveals
+          occluded bars
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; color bar positioned
+          appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Products" and "Quarters" labels present; color bar has "Sales (thousands)"
+          with units. Deducting 1 pt for no explicit Z-axis label showing units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.2, dashed), but colorbar title font could
+          be more readable
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D bar chart with bars on 2D grid rising into third dimension
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Products, Y=Quarters, Z=Sales height correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Color gradient reinforces z-values, semi-transparency reveals hidden
+          bars, proper category counts (5x4)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, scale 0-130 appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color bar correctly shows sales scale
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: false
+        comment: Uses "Quarterly Sales by Product - bar-3d - bokeh - pyplots.ai" instead
+          of required "bar-3d \u00b7 bokeh \u00b7 pyplots.ai" format (uses hyphen-dash,
+          not middle dot)
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows variation: high performer (Product C ~120-130), low performer
+          (Product D ~45-60), mixed performers, quarterly trends'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Business sales scenario with products and quarters is neutral and
+          plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values 45-130 (thousands) are realistic for quarterly product
+          sales
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports \u2192 data \u2192 projection logic \u2192
+          plot \u2192 save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: 'Most imports used, but ColorBar and LinearColorMapper are created
+          but ColorBar mapper uses the mapper. Range1d, CDN all used. Minor: could
+          simplify'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh 3.8 API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (both correct for Bokeh)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Bokeh's patch(), ColorBar, LinearColorMapper, custom Label annotations,
+          export_png for static output plus save() for interactive HTML
+  verdict: APPROVED
diff --git a/plots/bar-3d/metadata/highcharts.yaml b/plots/bar-3d/metadata/highcharts.yaml
index 9e554461b0..aee0097e0f 100644
--- a/plots/bar-3d/metadata/highcharts.yaml
+++ b/plots/bar-3d/metadata/highcharts.yaml
@@ -25,3 +25,183 @@ review:
     could help
   - The interactive HTML version could include drag-to-rotate functionality for better
     exploration
+  image_description: 'The plot displays a 3D bar chart with 5 product categories (Laptop,
+    Tablet, Phone, Monitor, Keyboard) on the x-axis extending into depth, with quarterly
+    data (Q1-Q4) represented as separate colored bars along the z-axis. The y-axis
+    shows Revenue ($K) from 0 to 260. The bars use a colorblind-safe palette: blue
+    (#306998) for Q1, yellow (#FFD43B) for Q2, purple (#9467BD) for Q3, and cyan (#17BECF)
+    for Q4. The title "bar-3d · highcharts · pyplots.ai" appears at the top in bold,
+    with a subtitle showing the data context. The 3D perspective is applied with a
+    moderate viewing angle (alpha=15, beta=20), creating depth perception. A vertical
+    legend in the upper-right corner identifies each quarter. The Phone category shows
+    the tallest bars (especially Q2 at ~235), while Keyboard shows the shortest values
+    (~30-50). The 3D effect shows bars arranged in depth with proper occlusion.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and subtitle readable, axis labels clear, but y-axis title
+          "Revenue ($K)" is slightly rotated and could be clearer
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized for the 3D view; some bars in the back are partially
+          occluded by taller front bars (expected in 3D but reduces visibility slightly)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette with blue, yellow, purple, and
+          cyan
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with plot filling ~60% of area; slight imbalance
+          with legend far right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive axis labels with units ("Revenue ($K)", "Product Category",
+          "Quarter")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed and readable; however, the z-axis "Quarter" label
+          appears at bottom right and is somewhat isolated
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D bar chart with proper depth rendering
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=products, Y=revenue, Z=quarters correctly mapped as per spec
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3D depth, categorical x/y dimensions,
+          numeric height values'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible (0-260 covers all values)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Q1-Q4
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-3d · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across products and quarters with realistic seasonal
+          patterns; could show more extreme variation between products
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales revenue by product category is a realistic and neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values in $K are sensible (30-235K); some products have similar
+          heights making comparison harder
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart config → series →
+          export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic output
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, highcharts_core, selenium, urllib, tempfile,
+          time, Path)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs plot.png correctly, but uses underscore in loop variable
+          "_product" and "_quarter" which is unnecessary
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Highcharts 3D module (options3d, alpha, beta, depth,
+          viewDistance), but could leverage more interactivity features in the HTML
+          version like drag rotation
+  verdict: APPROVED
diff --git a/plots/bar-3d/metadata/letsplot.yaml b/plots/bar-3d/metadata/letsplot.yaml
index 646d53cf84..1387786dd4 100644
--- a/plots/bar-3d/metadata/letsplot.yaml
+++ b/plots/bar-3d/metadata/letsplot.yaml
@@ -22,3 +22,176 @@ review:
   weaknesses:
   - Product labels on left side appear stacked vertically rather than aligned with
     their corresponding bar rows in the 3D space
+  image_description: The plot displays a 3D bar chart with an isometric perspective.
+    The title "bar-3d · letsplot · pyplots.ai" appears in bold at the top left. Twenty
+    3D bars are arranged in a 5x4 grid representing quarterly sales (Q1-Q4) across
+    five product categories (Electronics, Clothing, Home, Sports, Books). Each bar
+    uses the viridis color scale (purple→teal→green→yellow) to encode revenue values
+    ranging from ~15M to 65M. The bars have three visible faces (front, top, right)
+    with dark outlines creating clear 3D definition. A subtle gray floor grid provides
+    depth reference. Product labels are stacked vertically on the left, quarter labels
+    (Q1-Q4) appear along the front edge, and "Revenue ($M)" labels the z-axis. A color
+    legend on the right maps the gradient to revenue values.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, labels at 11-12pt are all clearly readable at full
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All labels well separated, no text overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars have good size and 0.85 alpha, depth sorting works well; minor
+          deduction for some back bars partially occluded
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is colorblind-safe with excellent perceptual uniformity
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight asymmetry with labels on left
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Revenue ($M)" has units, descriptive category labels'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Floor grid is subtle at alpha 0.5, legend well placed; however axis
+          lines are somewhat hidden
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D bar chart with bars rising from a 2D plane
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=quarters, Y=products, Z=revenue correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Two categorical dimensions, numeric height, color gradient for depth
+          perception
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 20 bars fully visible with no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color legend accurately shows Revenue ($M) range
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"bar-3d · letsplot · pyplots.ai" matches required format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across quarters and products, Electronics Q4 peak
+          at 65M, Books consistently lower; could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales by product category is a realistic business analytics
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values 15-65M are plausible; good variation but range could
+          be wider
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → projection math → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (pandas, numpy, lets_plot components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (HTML is a bonus, not a defect)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of geom_polygon, layer_tooltips for interactivity, scale_fill_viridis,
+          theme customization; could better utilize lets-plot's native grammar of
+          graphics features
+  verdict: APPROVED
diff --git a/plots/bar-3d/metadata/matplotlib.yaml b/plots/bar-3d/metadata/matplotlib.yaml
index e7725d89f2..e671b9ed47 100644
--- a/plots/bar-3d/metadata/matplotlib.yaml
+++ b/plots/bar-3d/metadata/matplotlib.yaml
@@ -24,3 +24,173 @@ review:
   - Grid styling could use alpha parameter for subtler appearance
   - Could benefit from slightly more contrast in bar heights (all values in 65-225
     range, some near-zero values would show more variation)
+  image_description: The plot shows a 3D bar chart with 5 products (Product A through
+    E) on the x-axis, 4 quarters (Q1-Q4) on the y-axis, and sales values (ranging
+    from ~65 to ~225) on the z-axis. The bars use a viridis colormap where purple/dark
+    blue represents lower values (~65-100), teal/green for mid-range values (~120-160),
+    and yellow for higher values (~200-225). The viewing angle is set at elevation
+    25° and azimuth 45°, providing good visibility of most bars. A colorbar on the
+    right reinforces the sales values. The bars have semi-transparent faces (alpha=0.85)
+    with dark gray edges. All axis labels are clear and readable, with "Product" and
+    "Quarter" labels and "Sales (thousands $)" on the z-axis.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 14-16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars well-sized and visible, slight occlusion of some back bars but
+          transparency helps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, colorbar slightly increases
+          right margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sales (thousands $)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid alpha setting (default grid), colorbar serves as legend substitute
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Products on x, quarters on y, sales as height correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 3D bars, color gradient for depth perception, semi-transparency as
+          spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, z-axis range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled with units
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "bar-3d · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across products and quarters, different growth patterns,
+          but all values are positive (no contrasting negative/near-zero)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales by product and quarter is a classic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in thousands of dollars (65-225) are realistic for quarterly
+          product sales
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set, though data is actually hardcoded
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used, both required
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses bar3d which is matplotlib's standard 3D plotting, but doesn't
+          use advanced features like annotations, custom lighting, or multiple subplots
+          for rotation views
+  verdict: APPROVED
diff --git a/plots/bar-3d/metadata/plotly.yaml b/plots/bar-3d/metadata/plotly.yaml
index ccc399cb43..5fb4fe6567 100644
--- a/plots/bar-3d/metadata/plotly.yaml
+++ b/plots/bar-3d/metadata/plotly.yaml
@@ -24,3 +24,173 @@ review:
   - Legend marker style (circular) does not match the bar chart style
   - Some minor occlusion of back rows inherent to 3D visualization could be mitigated
     with more transparency
+  image_description: The plot displays a 3D bar chart showing quarterly sales data
+    across 5 product categories (Electronics, Clothing, Food, Home, Sports). The bars
+    rise from a 2D plane with product categories on the x-axis and quarters (Q1-Q4)
+    on the y-axis. Bar heights represent sales values in thousands of dollars (z-axis
+    labeled "Sales ($K)"). Colors range from dark blue (Electronics) through lighter
+    blues (Clothing, Food) to yellow tones (Home, Sports), following a Python-inspired
+    color scheme. The title "bar-3d · plotly · pyplots.ai" is centered at the top.
+    A legend in the upper right identifies each product category. The 3D perspective
+    clearly shows Electronics having the highest sales overall with a notable Q4 holiday
+    boost pattern visible across categories.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, tick labels slightly small but
+          acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized, some occlusion in back rows but acceptable for
+          3D
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-to-yellow gradient is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight whitespace at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes have descriptive labels with units where appropriate
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well placed, grid subtle but background could be lighter
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D bar chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=products, Y=quarters, Z=sales correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Two categorical dimensions, color differentiation, 3D perspective
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, z-axis shows 0-150 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all product categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: bar-3d · plotly · pyplots.ai'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation across products and quarters, Q4 seasonal boost visible,
+          but could show more dramatic variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales data by product category and quarter is a realistic business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in $10K-$160K range are plausible for quarterly sales
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, plotly.graph_objects)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Mesh3d for proper 3D bars, hover templates, interactive HTML
+          export. Could leverage Plotly animation or more interactive features.
+  verdict: APPROVED
diff --git a/plots/bar-basic/metadata/altair.yaml b/plots/bar-basic/metadata/altair.yaml
index c30857b508..ab6d86e6a1 100644
--- a/plots/bar-basic/metadata/altair.yaml
+++ b/plots/bar-basic/metadata/altair.yaml
@@ -25,3 +25,175 @@ review:
   - Image dimensions (4500x2400) slightly below target specification (4800x2700);
     should use width=1600 for exact match
   - No value labels on or above bars as suggested in the specification Notes section
+  image_description: 'The plot displays a vertical bar chart showing product sales
+    by category. Seven blue bars (#306998) with subtle rounded top corners are arranged
+    in descending order from left to right. Categories shown are: Electronics (~$45,200),
+    Clothing (~$32,100), Home & Garden (~$28,400), Sports (~$21,800), Books (~$18,500),
+    Toys (~$15,200), and Food (~$12,300). The Y-axis displays "Sales ($)" with currency
+    formatting from $0 to $50,000. X-axis labels are rotated -45 degrees and show
+    "Product Category". The title reads "bar-basic · altair · pyplots.ai". A subtle
+    dashed grid is visible in the background. All text is clearly readable.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate font sizes (title 28pt, labels 22pt, ticks 18pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; the -45 degree angle on x-axis labels prevents
+          overlap effectively
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good spacing, easily distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with appropriate whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales ($)" which is descriptive but unit is in the label
+          not separate; X-axis has "Product Category"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid has appropriate opacity (0.3) with dashed styling, but no legend
+          present (not strictly required for single-series bar chart, but could add
+          context)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct vertical bar chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Consistent bar widths, single color, adequate spacing present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "bar-basic · altair · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good variation in values; bars sorted by height shows ranking
+          well; could benefit from value labels as suggested in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by retail category is a real, comprehensible scenario
+          matching spec examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values ($12K-$45K) are realistic for retail category data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png and plot.html correctly, but dimensions (1500×800
+          × 3 = 4500×2400) slightly below target (4800×2700)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding, tooltips, sort parameter, and
+          cornerRadius styling; could leverage more interactive features like selection
+          or conditional encoding
+  verdict: APPROVED
diff --git a/plots/bar-basic/metadata/bokeh.yaml b/plots/bar-basic/metadata/bokeh.yaml
index 5d0926f544..1cc63a20a2 100644
--- a/plots/bar-basic/metadata/bokeh.yaml
+++ b/plots/bar-basic/metadata/bokeh.yaml
@@ -23,3 +23,164 @@ review:
   weaknesses:
   - Data values are monotonically decreasing which does not showcase varied comparisons
     typical in real bar charts
+  image_description: The plot displays a vertical bar chart showing product sales
+    by category. Six blue bars (#306998) represent Electronics ($42,500), Clothing
+    ($31,200), Home & Garden ($28,700), Sports ($19,800), Books ($15,400), and Toys
+    ($12,600). Value labels appear above each bar. The title "bar-basic · bokeh ·
+    pyplots.ai" is centered at the top. The x-axis is labeled "Product Category" and
+    the y-axis "Sales ($)". A subtle dashed horizontal grid aids value reading. The
+    y-axis correctly starts at 0. Bars have consistent widths with adequate spacing
+    between them.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 24pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with good alpha (0.9), clear visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind concerns
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight right margin imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Product Category" and "Sales ($)" with unit'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is good (alpha 0.3, dashed), but no legend needed for single-series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct vertical bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values as bar heights
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Value labels present, consistent bar widths, single color, adequate
+          spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data, starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series bar chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in values across categories, but data is monotonically
+          decreasing (could show more varied pattern)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a real, comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in $12K-$42K range are realistic for category-level
+          retail data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), no random elements, but could
+          include seed comment for clarity
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-basic/metadata/highcharts.yaml b/plots/bar-basic/metadata/highcharts.yaml
index c8abc8bee8..24ea770d76 100644
--- a/plots/bar-basic/metadata/highcharts.yaml
+++ b/plots/bar-basic/metadata/highcharts.yaml
@@ -24,3 +24,172 @@ review:
     more subtle)'
   - Uses raw dict configuration instead of highcharts-core Python library classes
   - Bar height variation could be more pronounced to better demonstrate ranking differences
+  image_description: 'The plot displays a vertical bar chart (column chart) with 6
+    blue bars (#306998) on a white background. The title "bar-basic · highcharts ·
+    pyplots.ai" is displayed at the top center. The x-axis shows product categories:
+    Electronics, Clothing, Home & Garden, Sports, Books, and Toys. The y-axis shows
+    "Sales (Units)" ranging from 0 to 4500. Each bar has a data label showing the
+    exact value (4,200, 3,100, 2,800, 2,400, 1,900, 1,500). The bars are well-spaced
+    with subtle rounded corners. Grid lines are visible on the y-axis with a light
+    gray color. The layout is clean with good margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and data labels are all clearly
+          readable at the high resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with appropriate spacing and visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace on top margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Product Category" and "Sales (Units)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend disabled (appropriate for single series), but grid lines could
+          be more subtle (currently too prominent)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct vertical bar/column chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values as bar heights
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Consistent bar widths, value labels on bars, single color, adequate
+          spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis properly shows 0 to 4500, displaying all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly disabled for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 6 categories with varying values, demonstrates ranking capability,
+          but could show more variation in bar heights
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a real, comprehensible retail scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values (1,500-4,200 units) are realistic for retail
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → config → HTML → screenshot'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values, no randomness)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts patterns
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses dict-based config instead of highcharts-core library classes
+          (works but differs from library guide)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts-specific features like dataLabels, borderRadius,
+          pointPadding, but doesn't use the highcharts-core Python library as recommended
+          in library rules
+  verdict: APPROVED
diff --git a/plots/bar-basic/metadata/letsplot.yaml b/plots/bar-basic/metadata/letsplot.yaml
index 8f8f2d0119..0b9ef67153 100644
--- a/plots/bar-basic/metadata/letsplot.yaml
+++ b/plots/bar-basic/metadata/letsplot.yaml
@@ -23,3 +23,166 @@ review:
   weaknesses:
   - Could use scale_y_continuous with labels parameter for formatted y-axis tick values
     (currently shows raw numbers like 45,000 instead of $45K)
+  image_description: 'The plot displays a vertical bar chart showing sales data for
+    6 product categories: Electronics ($45,200), Clothing ($32,800), Home & Garden
+    ($28,500), Sports ($21,300), Books ($18,900), and Toys ($15,600). All bars use
+    a consistent blue color (#306998) with dollar-formatted value labels positioned
+    above each bar. The x-axis labels are rotated 45 degrees for readability. The
+    plot uses a minimal theme with subtle horizontal gridlines (no vertical gridlines).
+    The title "bar-basic · letsplot · pyplots.ai" is centered at the top. The y-axis
+    shows "Sales ($)" and x-axis shows "Product Category".'
+  criteria_checklist:
+    visual_quality:
+      score: 40
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text perfectly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, rotated x-labels avoid collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with appropriate width (0.6)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions and margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sales ($)", "Product Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle horizontal grid, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct vertical bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Value labels, consistent bar widths, single color as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within y-axis limits (0-55000)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-color chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct: "bar-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 6 varied categories with descending values demonstrating comparison
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Real scenario: product sales by category with plausible values'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic sales values ($15K-$45K range)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar, geom_text with label_format for currency,
+          theme_minimal, custom theme elements
+  verdict: APPROVED
diff --git a/plots/bar-basic/metadata/matplotlib.yaml b/plots/bar-basic/metadata/matplotlib.yaml
index c1f590dead..f761cce6c5 100644
--- a/plots/bar-basic/metadata/matplotlib.yaml
+++ b/plots/bar-basic/metadata/matplotlib.yaml
@@ -23,3 +23,171 @@ review:
     arrangement showing natural category variation
   - Could use matplotlib FuncFormatter for y-axis tick labels to show dollar signs
     consistently
+  image_description: 'The plot displays a vertical bar chart showing product sales
+    by category. Six steel-blue bars (#306998) represent different product categories:
+    Electronics ($45,200), Clothing ($32,800), Home & Garden ($28,500), Sports ($21,300),
+    Books ($18,900), and Toys ($15,600). Each bar has a dollar-formatted value label
+    positioned above it. The y-axis shows "Sales ($)" ranging from 0 to ~45,000, and
+    the x-axis shows "Product Category". The title follows the correct format: "bar-basic
+    · matplotlib · pyplots.ai". The plot has a clean design with subtle horizontal
+    grid lines, no top/right spines, and consistent bar widths with adequate spacing.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, value annotations
+          at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all category labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with good width (0.6), appropriate spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-proportioned, good use of space with tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales ($)" and "Product Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3) but no legend needed; however y-axis grid
+          only is appropriate
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct vertical bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values as bar heights
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Value labels on bars, consistent bar widths, single color, adequate
+          spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, shows all data properly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series bar chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 6 categories with clear value differences, though data is monotonically
+          decreasing which is less interesting than mixed ordering
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a plausible, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in thousands are realistic for category comparisons
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed comment to
+          indicate intentional reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of Axes methods, spines control, and annotation, but could
+          leverage more matplotlib-specific features like custom formatters
+  verdict: APPROVED
diff --git a/plots/bar-basic/metadata/plotly.yaml b/plots/bar-basic/metadata/plotly.yaml
index ca3de2a8dd..b0767ff7a9 100644
--- a/plots/bar-basic/metadata/plotly.yaml
+++ b/plots/bar-basic/metadata/plotly.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/bar-basic/metadata/plotnine.yaml b/plots/bar-basic/metadata/plotnine.yaml
index 4a04ea2277..97b9813005 100644
--- a/plots/bar-basic/metadata/plotnine.yaml
+++ b/plots/bar-basic/metadata/plotnine.yaml
@@ -25,3 +25,172 @@ review:
     setting
   - Could use scale_y_continuous to add dollar formatting to y-axis tick labels for
     consistency with value labels
+  image_description: 'The plot displays a vertical bar chart showing sales data across
+    6 product categories. The bars are rendered in a consistent blue color (#306998).
+    The x-axis shows "Product Category" with labels: Books, Clothing, Electronics,
+    Home & Garden, Sports, and Toys. The y-axis shows "Sales ($)" ranging from 0 to
+    approximately 45,000. Each bar has a value label positioned above it showing the
+    exact sales figure with dollar formatting (e.g., $45,200, $32,800, etc.). The
+    title reads "bar-basic · plotnine · pyplots.ai" in the correct format. The plot
+    uses a clean minimal theme with a light gray background and subtle gridlines.
+    The layout is well-balanced with a 16:9 aspect ratio.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clear and separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with appropriate width (0.7), clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, 16:9 aspect ratio, balanced whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales ($)" and "Product Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed, but grid is default plotnine style (could be more
+          subtle)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct vertical bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Consistent bar widths, value labels above bars, single color, adequate
+          spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range from 0 to beyond max value
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-color bar chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 6 categories with varying heights, good range of values, but
+          all positive (no variation in direction)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a real, comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Sales values are realistic ($12K-$45K range), though slightly round
+          numbers
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no explicit seed for any randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with geom_bar + geom_text, theme_minimal,
+          but could leverage more plotnine features like scale_y_continuous for formatting
+  verdict: APPROVED
diff --git a/plots/bar-basic/metadata/pygal.yaml b/plots/bar-basic/metadata/pygal.yaml
index fbecd983b7..7324411902 100644
--- a/plots/bar-basic/metadata/pygal.yaml
+++ b/plots/bar-basic/metadata/pygal.yaml
@@ -22,3 +22,170 @@ review:
   - Grid lines could be more subtle (currently at default opacity)
   - Font sizes in the style are larger than library defaults but tick labels could
     be slightly larger for optimal legibility
+  image_description: 'The plot displays a vertical bar chart showing "Quarterly sales
+    by product category" with 6 categories: Electronics ($45,200), Clothing ($32,800),
+    Home & Garden ($28,500), Sports ($19,700), Books ($15,300), and Toys ($12,400).
+    All bars use a consistent Python Blue color (#306998). Value labels are positioned
+    on top of each bar with dollar formatting. The title "bar-basic · pygal · pyplots.ai"
+    appears at the top. The x-axis is labeled "Category" and y-axis "Sales ($)" with
+    horizontal grid lines. The layout is clean with good proportions and adequate
+    spacing between bars.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and value labels are all clearly readable; tick
+          labels slightly small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight extra whitespace on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sales ($)", "Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend shown (appropriate since single series), but grid could
+          be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct vertical bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values as bar heights
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Value labels present, consistent bar widths, adequate spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at $0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly hidden for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bar-basic · pygal · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in values, clear ranking; could benefit from more
+          contrast between adjacent values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales by product category is a plausible retail scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for quarterly sales; range could be slightly
+          wider for more visual impact
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → chart → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates plot.html (minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses custom Style, value_formatter, print_values; could leverage
+          more pygal-specific interactivity features
+  verdict: APPROVED
diff --git a/plots/bar-basic/metadata/seaborn.yaml b/plots/bar-basic/metadata/seaborn.yaml
index 062778b0d9..71307e2425 100644
--- a/plots/bar-basic/metadata/seaborn.yaml
+++ b/plots/bar-basic/metadata/seaborn.yaml
@@ -23,3 +23,160 @@ review:
     capabilities
   - Does not leverage seaborn-specific features beyond basic barplot (e.g., error
     bars, hue encoding for highlighting)
+  image_description: 'The plot displays a vertical bar chart showing product sales
+    by category. Six blue bars (#306998) represent categories: Electronics (145),
+    Clothing (98), Home & Garden (76), Sports (112), Books (54), and Toys (89). Bold
+    value labels appear above each bar. The title "bar-basic · seaborn · pyplots.ai"
+    is positioned at the top. The y-axis shows "Sales (units)" and x-axis shows "Product
+    Category". The chart has a clean design with removed top/right spines and subtle
+    dashed gridlines on the y-axis. All text is clearly legible and well-sized.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt, value labels 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all elements clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths appropriate, good spacing between bars
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight extra whitespace on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales (units)", "Product Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend needed for single-color
+          bars
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct vertical bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Value labels on bars, consistent bar widths, adequate spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to beyond max value
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, single color used appropriately
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in bar heights, but all values in similar range (54-145)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for unit sales (54-145 units)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Deterministic data (no randomness), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, pandas, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-categorical/metadata/altair.yaml b/plots/bar-categorical/metadata/altair.yaml
index bbd8993339..ee67e962a8 100644
--- a/plots/bar-categorical/metadata/altair.yaml
+++ b/plots/bar-categorical/metadata/altair.yaml
@@ -23,3 +23,173 @@ review:
   weaknesses:
   - Category frequency distribution could show more extreme variation to better demonstrate
     the counting feature (e.g., one very rare category)
+  image_description: 'The plot displays a vertical bar chart showing retail transaction
+    counts across 6 product categories. The bars are rendered in a pleasant blue color
+    (#306998) with subtle rounded corners at the top. Categories are displayed on
+    the x-axis with labels angled at -30 degrees for readability: Electronics (42),
+    Clothing (34), Home & Garden (27), Books (19), Sports (16), and Toys (12). The
+    bars are sorted in descending order by count. The y-axis shows "Number of Transactions"
+    ranging from 0 to 44. The title "bar-categorical · altair · pyplots.ai" appears
+    at the top center. The layout is clean with subtle gray gridlines and good use
+    of canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: X-axis labels angled at -30° prevents overlap, all text readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, clearly visible, appropriate for 6 categories
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins, plot fills appropriate
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Product Category" and "Number of Transactions"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3), but no legend needed for single-color
+          bars (deducting 0 - actually fine)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart for categorical count data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, counts on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Automatic count aggregation using `count():Q`, sorted by count descending
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to slightly above max count
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series chart, tooltips provided instead
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-categorical · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows varying frequencies across categories with realistic distribution,
+          but could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Retail transactions by product category is a believable, neutral
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 150 transactions across 6 categories with realistic weighted distribution
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Altair declarative syntax: `count():Q` aggregation,
+          `:N` encoding type, `sort="-y"`, tooltips, `cornerRadius` styling, `configure_axis`
+          and `configure_view`'
+  verdict: APPROVED
diff --git a/plots/bar-categorical/metadata/bokeh.yaml b/plots/bar-categorical/metadata/bokeh.yaml
index 45044f1398..afa53f6df4 100644
--- a/plots/bar-categorical/metadata/bokeh.yaml
+++ b/plots/bar-categorical/metadata/bokeh.yaml
@@ -21,3 +21,170 @@ review:
   weaknesses:
   - Missing HoverTool to display exact counts on hover (Bokeh's key interactive feature)
   - Could benefit from value labels on top of bars for static PNG output
+  image_description: The plot displays a vertical bar chart showing programming language
+    popularity based on survey responses. There are 7 bars representing Python (~153),
+    JavaScript (~118), Java (~74), C++ (~48), Rust (~39), Go (~37), and TypeScript
+    (~30). The bars are rendered in a muted blue color (#306998) with darker blue
+    outlines. The title "bar-categorical · bokeh · pyplots.ai" appears at the top
+    center. The x-axis is labeled "Programming Language" and shows category names
+    horizontally (no rotation). The y-axis is labeled "Number of Responses" with values
+    from 0 to 160. The background is light gray (#fafafa) with subtle dashed horizontal
+    grid lines. Bars are sorted in descending order by count.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, tick labels at 28pt - all perfectly
+          readable at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels are well-spaced horizontally
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized with good width (0.7), visible borders
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no color distinction needed for this plot type
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (counts don't need units, acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle dashed grid (alpha 0.3), no legend needed; x-grid correctly
+          disabled
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart for categorical count data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, counts on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Automatic counting from raw data, sorted by count descending
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, shows all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series bar chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-categorical · bokeh · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows frequency distribution with varied counts; could show more
+          dramatic variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language survey is a realistic, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 500 responses with 7 categories is reasonable; counts are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and vbar correctly, but doesn't leverage Bokeh's
+          interactive features (HoverTool with count display would enhance this)
+  verdict: APPROVED
diff --git a/plots/bar-categorical/metadata/highcharts.yaml b/plots/bar-categorical/metadata/highcharts.yaml
index fb205268ac..917a5372af 100644
--- a/plots/bar-categorical/metadata/highcharts.yaml
+++ b/plots/bar-categorical/metadata/highcharts.yaml
@@ -23,3 +23,172 @@ review:
   - X-axis title is partially cut off at the bottom of the image despite marginBottom/spacingBottom
     settings
   - Image height is 2561px instead of the specified 2700px (browser viewport issue)
+  image_description: 'The plot displays a vertical column/bar chart with a white background.
+    The title "bar-categorical · highcharts · pyplots.ai" appears at the top in bold
+    black text, with a subtitle "Product Category Purchase Frequency" below it. Six
+    blue (#306998) bars represent different product categories: Electronics (130),
+    Clothing (93), Books (86), Home & Garden (78), Sports (58), and Toys (55). Each
+    bar has a data label showing its count value. The x-axis is labeled "Product Category"
+    and shows all category names clearly. The y-axis is labeled "Count" and ranges
+    from 0 to 140 with gridlines. The bars are sorted in descending order by count.
+    The bars have slightly rounded corners (borderRadius: 4).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 48px, axis titles at 36px, labels at 32px - all highly readable.
+          Slight deduction as tick labels could be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere; categories well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color (#306998 blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall but x-axis title "Product Category" is cut off at the
+          bottom of the image
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Product Category" and "Count" are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend correctly disabled for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct column/bar chart for categorical counts
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, counts on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Auto-counts raw categorical data, sorts by descending count
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range (0-140) covering all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-categorical · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying frequencies across 6 categories demonstrating the counting
+          functionality well. Could have more categories for richer demonstration.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product category purchases is a realistic e-commerce scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 500 samples with counts ranging 55-130 is reasonable, though probabilities
+          could create more dramatic variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs plot.png correctly, but image dimensions are 4800x2561 instead
+          of specified 4800x2700
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnSeries, dataLabels, borderRadius styling. Could leverage
+          more Highcharts features like animations or tooltips in HTML output.
+  verdict: APPROVED
diff --git a/plots/bar-categorical/metadata/letsplot.yaml b/plots/bar-categorical/metadata/letsplot.yaml
index fb6c7d812e..ee6fd7a84c 100644
--- a/plots/bar-categorical/metadata/letsplot.yaml
+++ b/plots/bar-categorical/metadata/letsplot.yaml
@@ -23,3 +23,171 @@ review:
   - Does not leverage lets-plot interactive features (tooltips, hover effects) that
     distinguish it from plotnine
   - Data context is generic; could use a more specific real-world scenario
+  image_description: 'The plot displays a categorical bar chart showing fruit type
+    frequency counts. Five vertical bars represent different fruits: Bananas (~40),
+    Mangoes (~31), Grapes (~33), Oranges (~40), and Apples (~56). The bars are rendered
+    in a muted blue color (#306998) with darker borders. The title "bar-categorical
+    · letsplot · pyplots.ai" appears at the top left in bold. The x-axis is labeled
+    "Fruit Type" with category names below each bar, and the y-axis is labeled "Count"
+    with tick marks from 0 to 55. The plot uses a minimal theme with horizontal grid
+    lines only, and the overall layout is clean with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large, axis labels and tick text are clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized with good width and clear visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("Fruit Type", "Count") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Horizontal grid only is appropriate for bar chart, but no legend
+          needed; minor grid could be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical count bar chart using geom_bar()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, counts on y-axis (computed automatically)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Automatic count computation from raw categorical data
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range of counts (0-55)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series bar chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "bar-categorical · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation in counts across categories, demonstrates the counting
+          functionality well
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Fruit popularity is plausible but generic; could be more specific
+          (e.g., store inventory, survey results)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 200 samples with realistic probability weights producing sensible
+          counts
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html which is correct for letsplot
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with geom_bar() and theme customization,
+          but does not leverage lets-plot specific features like tooltips or interactivity
+  verdict: APPROVED
diff --git a/plots/bar-categorical/metadata/matplotlib.yaml b/plots/bar-categorical/metadata/matplotlib.yaml
index 7a7790595a..5b188884e7 100644
--- a/plots/bar-categorical/metadata/matplotlib.yaml
+++ b/plots/bar-categorical/metadata/matplotlib.yaml
@@ -23,3 +23,172 @@ review:
   - Could use more distinctive matplotlib features (e.g., custom hatching, gradient
     fills, or annotation arrows)
   - Y-axis label "Count (Frequency)" is slightly redundant - could be simplified
+  image_description: 'The plot displays a vertical bar chart with 5 bars representing
+    product categories (Product A through E) on the x-axis. The bars are colored in
+    Python Blue (#306998) with a darker edge color. Each bar has a bold count label
+    above it showing the frequency values: Product A (154), Product B (118), Product
+    C (97), Product D (76), and Product E (55). The bars are sorted in descending
+    order by count. The y-axis shows "Count (Frequency)" ranging from 0 to 160, and
+    the x-axis is labeled "Product Category". The title follows the correct format:
+    "bar-categorical · matplotlib · pyplots.ai". A subtle horizontal dashed grid is
+    visible on the y-axis. The top and right spines are removed for a cleaner appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, value labels
+          at 18pt bold - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, bars well-spaced, labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized, good width relative to spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue), no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but y-axis could have units (e.g., "Count (n)"
+          or just "Count")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend needed for this single-series
+          chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart for categorical count data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, counts on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Auto-counts raw categorical data, sorted descending as noted in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to above max value
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-categorical · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying frequencies across 5 categories, good distribution
+          variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product preference survey is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 500 survey responses with sensible probability distribution
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (matplotlib, numpy, pandas) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses standard matplotlib bar chart with Axes methods. Added value
+          labels and spine removal are nice touches, but could leverage more matplotlib-specific
+          features like custom tick formatting or annotations.
+  verdict: APPROVED
diff --git a/plots/bar-categorical/metadata/plotly.yaml b/plots/bar-categorical/metadata/plotly.yaml
index 766868d8b8..8417bfd69b 100644
--- a/plots/bar-categorical/metadata/plotly.yaml
+++ b/plots/bar-categorical/metadata/plotly.yaml
@@ -23,3 +23,169 @@ review:
     interactivity
   - Grid lines are very subtle (alpha 0.1) - could be slightly more visible for better
     readability
+  image_description: 'The plot displays a vertical bar chart with 6 blue bars representing
+    fruit category frequencies. The title "bar-categorical · plotly · pyplots.ai"
+    is centered at the top. The x-axis is labeled "Fruit Category" showing categories:
+    Apple (130), Banana (93), Orange (86), Grape (78), Mango (58), and Strawberry
+    (55). The y-axis is labeled "Count (Frequency)" ranging from 0 to 120+. Each bar
+    has its count value displayed above it. The bars are a solid blue color (#306998)
+    with darker blue borders. The background is white with subtle horizontal grid
+    lines. The layout is clean with good spacing between bars.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 20pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars clearly visible with good sizing and spacing (bargap=0.3)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no color differentiation issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, slight excess margin on
+          right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Fruit Category" and "Count (Frequency)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.1), but no legend needed; grid could be slightly
+          more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart type for categorical counting
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, counts on y-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Automatic counting from raw data, sorted by count descending
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis appropriately scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series bar chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "bar-categorical · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying frequencies well; could show more dramatic differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Fruit preference survey is a neutral, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 500 samples reasonable, though frequencies are relatively close together
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Bar correctly but doesn't leverage Plotly's interactive features
+          like hover templates or animations
+  verdict: APPROVED
diff --git a/plots/bar-categorical/metadata/plotnine.yaml b/plots/bar-categorical/metadata/plotnine.yaml
index 23f799f00e..762d7dca09 100644
--- a/plots/bar-categorical/metadata/plotnine.yaml
+++ b/plots/bar-categorical/metadata/plotnine.yaml
@@ -25,3 +25,176 @@ review:
   - Could use scale_fill_brewer() with a diverging palette instead of manual colors
     for better colorblind accessibility
   - Missing count annotations on bars which would enhance readability
+  image_description: 'The plot displays a vertical bar chart showing customer satisfaction
+    survey results. Five categories are shown on the x-axis: "Excellent", "Good",
+    "Average", "Poor", and "Very Poor" (in logical order from best to worst). The
+    y-axis shows "Number of Responses" ranging from 0 to approximately 65. The bars
+    use a gradient color scheme from blue tones (Excellent=#306998, Good=#4A90C2)
+    through yellow (Average=#FFD43B) to orange-brown tones (Poor=#E8A838, Very Poor=#CC6633).
+    The "Good" category has the highest count (~66), followed by "Excellent" (~55),
+    "Average" (~36), "Poor" (~27), and "Very Poor" (~15). The title correctly shows
+    "bar-categorical · plotnine · pyplots.ai". The plot has a clean minimal theme
+    with no legend (appropriately hidden since colors match x-axis labels), subtle
+    horizontal grid lines, and well-proportioned layout filling the canvas appropriately.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with appropriate width (0.7) and alpha (0.9)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good diverging color scheme from blue to orange, but the yellow "Average"
+          bar is slightly low contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 16:9 canvas, plot fills space appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (though "Number of Responses" is
+          clear enough for count data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is appropriately subtle, legend correctly hidden, but major
+          x-grid removed which is good
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical bar chart with automatic counting via geom_bar()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, counts on Y (computed automatically)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Automatic count computation, ordered categories displayed
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range of data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden as colors are redundant with x-axis
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-categorical · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying counts across categories with realistic distribution,
+          but could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product satisfaction survey is a perfect, neutral real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 200 responses is reasonable for a survey, though counts are slightly
+          high for a typical survey sample
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) properly set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with geom_bar for automatic counting,
+          pd.Categorical for ordering, but could leverage more plotnine features like
+          stat_count annotations or scale_fill_brewer
+  verdict: APPROVED
diff --git a/plots/bar-categorical/metadata/pygal.yaml b/plots/bar-categorical/metadata/pygal.yaml
index a35d5d6504..605edc76aa 100644
--- a/plots/bar-categorical/metadata/pygal.yaml
+++ b/plots/bar-categorical/metadata/pygal.yaml
@@ -22,3 +22,175 @@ review:
   - Could use more distinctive pygal features (e.g., custom tooltips with percentage,
     value_formatter showing both count and percentage)
   - Color variety could be added to distinguish high vs low frequency categories
+  image_description: 'The plot displays a vertical bar chart showing product category
+    frequencies. Six blue bars (#306998) represent different electronics categories:
+    Smartphone (142), Laptop (130), Tablet (74), Smartwatch (62), Desktop (48), and
+    Headphones (44). The bars are sorted in descending order by count. Each bar has
+    its count value displayed on top. The title "bar-categorical · pygal · pyplots.ai"
+    appears at the top. The x-axis is labeled "Product Category" and y-axis is labeled
+    "Count (Frequency)". Horizontal grid lines (dotted, subtle) help read values.
+    White background with clean layout.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and value labels all clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; category labels well-spaced, values above bars
+          don't collide
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible with good spacing between
+          them
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (blue), no color differentiation needed for this
+          single-series chart
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot fills appropriate area; slight whitespace
+          on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels: "Product Category" and "Count
+          (Frequency)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriately subtle, but legend is hidden (show_legend=False)
+          which is correct for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart for categorical count data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, counts on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Automatically counts raw categorical data, sorted by count descending
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to beyond max count (142)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden for single-series data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-categorical · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying frequencies across categories demonstrating the count
+          concept; could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Electronics product categories is a neutral, realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 500 samples with realistic probability weights; counts are reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → style → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses pygal's Style customization and print_values feature, but could
+          leverage more pygal-specific features like tooltips or animations in HTML
+          output
+  verdict: APPROVED
diff --git a/plots/bar-categorical/metadata/seaborn.yaml b/plots/bar-categorical/metadata/seaborn.yaml
index 4d62407d86..8812824b4c 100644
--- a/plots/bar-categorical/metadata/seaborn.yaml
+++ b/plots/bar-categorical/metadata/seaborn.yaml
@@ -24,3 +24,172 @@ review:
     seaborn styling
   - The order parameter manually specifies the order rather than computing it from
     the data
+  image_description: 'The plot displays a vertical bar chart showing programming language
+    preferences from survey responses. Seven bars represent different languages: Python
+    (dark blue, 140 responses), JavaScript (golden yellow, 101), Java (steel blue,
+    86), C++ (teal, 53), Go (olive/teal, 47), Rust (salmon/peach, 37), and TypeScript
+    (medium blue, 36). Each bar has its count displayed above it. The title reads
+    "bar-categorical · seaborn · pyplots.ai" at the top. X-axis is labeled "Programming
+    Language" and Y-axis "Number of Responses". A subtle dashed grid is present on
+    the y-axis. The top and right spines are removed for a cleaner appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels and bar annotations clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible with good spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Custom palette with distinct colors; mostly accessible but the two
+          blue tones (Python/TypeScript) are somewhat similar
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas, plot fills appropriate space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Programming Language" and "Number of Responses"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend needed here (colors are
+          self-explanatory via x-axis)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical count bar chart using countplot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, counts on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Automatic counting, categories displayed, ordered by frequency (descending)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories and counts fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; bar labels serve the purpose
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-categorical · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation in counts from high (140) to low (36), demonstrating
+          frequency distribution well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language survey is a neutral, relatable scenario for
+          developers
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 500 total responses with realistic weighted distribution
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → styling → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib, numpy, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with hue parameter correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.countplot which is a seaborn-specific function for automatic
+          counting, but the implementation could have leveraged more seaborn styling
+          features like set_theme() or statistical annotations
+  verdict: APPROVED
diff --git a/plots/bar-diverging/metadata/altair.yaml b/plots/bar-diverging/metadata/altair.yaml
index bfcb59e7f7..e06d09f1e4 100644
--- a/plots/bar-diverging/metadata/altair.yaml
+++ b/plots/bar-diverging/metadata/altair.yaml
@@ -24,3 +24,169 @@ review:
     closer to the chart or at top
   - Realistic context score slightly reduced - while customer satisfaction is valid,
     the exact score values feel somewhat arbitrary
+  image_description: 'The plot displays a horizontal diverging bar chart showing "Net
+    Satisfaction Score" for 12 company departments. Bars extend from a central zero
+    baseline - positive values (blue/Python blue #306998) extend to the right, negative
+    values (yellow/Python yellow #FFD43B) extend to the left. The chart is sorted
+    by satisfaction score from lowest (Logistics at -45) at the top to highest (Customer
+    Service at +42) at the bottom. A dark vertical line marks the zero baseline. The
+    x-axis ranges from -60 to 60 with label "Net Satisfaction Score". Department names
+    appear on the y-axis. A legend in the bottom-right corner shows "Sentiment" with
+    Positive and Negative indicators. The title "bar-diverging · altair · pyplots.ai"
+    appears centered at the top. Grid lines are subtle with dashed styling.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, department labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized with cornerRadius for polish
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow is colorblind-safe (not red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight margin imbalance on left side with longer
+          department names
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label but no units (score is unitless, so
+          acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha 0.3 and dashed style; legend placed bottom-right
+          but could be better integrated
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct diverging bar chart with bars extending in opposite directions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Category on Y-axis, value on X-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Contrasting colors, horizontal orientation, zero baseline, sorted
+          bars - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Scale domain [-60, 60] shows all data with headroom
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Positive/Negative sentiment
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "bar-diverging · altair · pyplots.ai" used
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive and negative values with good distribution across
+          the range
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Customer satisfaction survey is plausible; scores ranging -45 to
+          +42 are reasonable for net satisfaction
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for a satisfaction score context
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random), but could benefit from explicit comment
+          noting this
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-diverging/metadata/bokeh.yaml b/plots/bar-diverging/metadata/bokeh.yaml
index 9c46e369ba..2697f7e1dc 100644
--- a/plots/bar-diverging/metadata/bokeh.yaml
+++ b/plots/bar-diverging/metadata/bokeh.yaml
@@ -27,3 +27,174 @@ review:
     be consistent with style guide)
   - Could leverage Bokeh interactive features like HoverTool to show exact values
     on hover
+  image_description: The plot displays a horizontal diverging bar chart showing customer
+    satisfaction survey results (Net Promoter Score style) across 10 categories. Blue
+    bars (#306998) extend rightward from zero for positive values, while yellow/gold
+    bars (#FFD43B) extend leftward for negative values. Categories are sorted by value
+    from lowest (Tech Support at -35) to highest (Return Policy at +52). The title
+    "bar-diverging · bokeh · pyplots.ai" appears at the top left. X-axis shows "Net
+    Satisfaction Score" ranging from -60 to 60+, Y-axis shows "Category" with all
+    10 category labels clearly visible. A subtle vertical baseline at zero separates
+    positive from negative values. The plot uses the full canvas width effectively
+    with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, horizontal orientation prevents label collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good height (0.7), alpha at 0.9 provides
+          solid visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow is colorblind-safe, though contrast could be slightly
+          better
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, balanced whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (score is unitless, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend explaining color meaning
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct diverging bar chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: contrasting colors, horizontal orientation,
+          zero baseline, sorted bars'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-range (-60, 70) shows all data with padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed as colors are self-explanatory with positive/negative
+          context
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-diverging · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive and negative values with good variety (-35 to
+          +52)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Customer satisfaction NPS scenario is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in -100 to +100 NPS range are realistic
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed, though data is deterministic (hardcoded values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Bokeh features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Span for zero line, hbar method, but could
+          leverage HoverTool or other interactive features
+  verdict: APPROVED
diff --git a/plots/bar-diverging/metadata/highcharts.yaml b/plots/bar-diverging/metadata/highcharts.yaml
index 72463669d7..169f33140c 100644
--- a/plots/bar-diverging/metadata/highcharts.yaml
+++ b/plots/bar-diverging/metadata/highcharts.yaml
@@ -22,3 +22,171 @@ review:
   weaknesses:
   - Y-axis title lacks units (could be "Net Satisfaction Score (%)")
   - Bars could be slightly thicker (pointWidth) to better fill the vertical space
+  image_description: 'The plot displays a horizontal diverging bar chart showing "Department
+    Net Satisfaction Scores". Ten department categories are listed on the y-axis (Customer
+    Service, IT Support, Sales, Marketing, Finance, Operations, HR, R&D, Legal, Logistics).
+    Bars extend from a central zero baseline - positive values (blue/teal color #306998)
+    extend to the right, negative values (yellow/gold color #FFD43B) extend to the
+    left. A clear vertical black line marks the zero baseline. Each bar has a data
+    label showing its value. The title follows the correct format "bar-diverging ·
+    highcharts · pyplots.ai" with a subtitle "Department Net Satisfaction Scores".
+    The x-axis ranges from -50 to 60 with clear tick labels.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, labels, and tick marks are all clearly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized with good visual weight
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but bars appear somewhat thin relative to the
+          large canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis title "Net Satisfaction Score" is descriptive but lacks units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend disabled (appropriate for single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct diverging bar chart with bars extending both directions from
+          zero
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has contrasting colors, horizontal orientation, zero baseline indicator,
+          data sorted by value
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate min/max (-50 to 60)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled appropriately for single-series chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "bar-diverging · highcharts · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive and negative values with good spread
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Customer satisfaction survey is plausible, though scores could be
+          more varied
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Net satisfaction scores from -38 to +45 are realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded), but no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts plotLines for zero baseline, data labels, but could
+          leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/bar-diverging/metadata/letsplot.yaml b/plots/bar-diverging/metadata/letsplot.yaml
index 212164d1dd..0349fb8088 100644
--- a/plots/bar-diverging/metadata/letsplot.yaml
+++ b/plots/bar-diverging/metadata/letsplot.yaml
@@ -25,3 +25,178 @@ review:
   - Could add interactive tooltips to leverage lets-plot interactive capabilities
     in the HTML output
   - Data distribution slightly unbalanced (only 4 negative vs 8 positive categories)
+  image_description: 'The plot displays a horizontal diverging bar chart showing Net
+    Promoter Scores for 12 customer satisfaction categories. Bars extend left (red,
+    #DC2626) for negative scores and right (blue, #306998) for positive scores from
+    a clear vertical baseline at zero. Categories are sorted by score from lowest
+    (Mobile App at -35) to highest (Product Quality at +72). The title "bar-diverging
+    · letsplot · pyplots.ai" appears at top center in bold. X-axis is labeled "Net
+    Promoter Score" and Y-axis is labeled "Category". A legend on the right shows
+    "Sentiment" with Negative (red) and Positive (blue) indicators. The layout is
+    clean with a minimal theme, subtle grid lines on the x-axis only, and good use
+    of whitespace.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold, axis labels and tick marks are all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels are well-spaced horizontally
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with appropriate width and alpha for clear visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Red/blue is colorblind-safe (distinguishable even with red-green
+          colorblindness)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but "Net Promoter Score" could include units
+          or range indicator
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (only on x-axis), legend is well-placed and doesn't
+          overlap data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct diverging bar chart with bars extending in opposite directions
+          from zero
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has contrasting colors, horizontal orientation, zero baseline indicator,
+          sorted bars
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, range from -40 to +70 shown appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Negative/Positive sentiment
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "bar-diverging · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive and negative values with good distribution (4
+          negative, 8 positive)
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Customer satisfaction NPS data is plausible, though slightly more
+          negative categories could improve balance
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: NPS scores from -35 to +72 are realistic for the -100 to +100 scale
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed comment for
+          clarity
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with geom_bar, geom_vline, scale_fill_manual,
+          and theme customization. Could leverage more lets-plot specific features
+          like tooltips for interactivity.
+  verdict: APPROVED
diff --git a/plots/bar-diverging/metadata/matplotlib.yaml b/plots/bar-diverging/metadata/matplotlib.yaml
index 212efb0551..7ea4a26fda 100644
--- a/plots/bar-diverging/metadata/matplotlib.yaml
+++ b/plots/bar-diverging/metadata/matplotlib.yaml
@@ -23,3 +23,171 @@ review:
   weaknesses:
   - X-axis label could include context (e.g., Net Satisfaction Score (%)) to clarify
     the measurement scale
+  image_description: 'The plot displays a horizontal diverging bar chart showing product
+    satisfaction survey scores for 12 categories. Blue bars (#306998) extend rightward
+    from the zero baseline for positive satisfaction scores (satisfied), while yellow
+    bars (#FFD43B) extend leftward for negative scores (dissatisfied). Categories
+    are sorted by value from lowest (Mobile App: -52) at the bottom to highest (Brand
+    Trust: +85) at the top. Each bar has a bold value label at its end showing the
+    score with +/- sign. A clear vertical black line marks the zero baseline. The
+    title "bar-diverging · matplotlib · pyplots.ai" appears at the top. A legend in
+    the lower right explains "Positive (Satisfied)" and "Negative (Dissatisfied)".
+    Subtle dashed gridlines appear on the x-axis. The x-axis is labeled "Net Satisfaction
+    Score" with range from -100 to +100.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar height 0.7 optimal for 12 categories
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow colorblind-safe but not ideal (blue/orange would be better)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Net Satisfaction Score" descriptive but unitless'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid (alpha 0.3), well-placed legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct diverging bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: contrasting colors, horizontal orientation,
+          zero baseline indicator, sorted by value'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Labels correctly describe positive/negative
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: bar-diverging · matplotlib · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive AND negative values with good variation in magnitudes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product satisfaction survey with plausible categories (Customer Support,
+          Pricing, Mobile App, etc.)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in -100 to +100 range as specified in spec
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean flow: imports → data → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib.pyplot, numpy, Patch)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No outdated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of barh, axvline, spines manipulation, Patch for custom
+          legend
+  verdict: APPROVED
diff --git a/plots/bar-diverging/metadata/plotly.yaml b/plots/bar-diverging/metadata/plotly.yaml
index d450c142cc..a86df9d0fb 100644
--- a/plots/bar-diverging/metadata/plotly.yaml
+++ b/plots/bar-diverging/metadata/plotly.yaml
@@ -23,3 +23,180 @@ review:
   - Missing legend to explain color coding (blue=satisfied, yellow=dissatisfied)
   - X-axis label could include units like Satisfaction Score (NPS-style -100 to +100)
   - Data values could include more extreme scores closer to ±100 for fuller demonstration
+  image_description: The plot displays a horizontal diverging bar chart showing customer
+    satisfaction survey results across 10 departments. Blue bars (#306998) extend
+    rightward from the zero baseline for positive scores, while yellow/gold bars (#FFD43B)
+    extend leftward for negative scores. The data is sorted by value from highest
+    (Customer Support at +72) at the top to lowest (Return Policy at -42) at the bottom.
+    Each bar displays its value with +/- sign positioned outside the bar. A clear
+    dark vertical line marks the zero baseline. The x-axis spans -100 to +100 with
+    tick marks at 25-point intervals. Title reads "Customer Satisfaction Survey ·
+    bar-diverging · plotly · pyplots.ai" centered at top. The layout uses a clean
+    white template with subtle gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 18pt, value labels
+          at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with good spacing (bargap=0.3), white borders provide
+          separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow is colorblind-safe, though yellow on white could have
+          slightly better contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well with appropriate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Satisfaction Score" is descriptive but lacks units; "Department"
+          is appropriate'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but no legend explaining color meaning (blue=positive,
+          yellow=negative)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct diverging bar chart with bars extending in opposite directions
+          from zero
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has contrasting colors, horizontal orientation, zero baseline, sorted
+          by value
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range [-100, 100] shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this chart type, colors are self-explanatory with +/- values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Customer Satisfaction Survey · bar-diverging
+          · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive (6 categories) and negative (4 categories) values
+          with good range of magnitudes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer satisfaction survey is a perfect real-world use case for
+          diverging bars
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Values are plausible but could be more varied (no extreme values
+          near ±100)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → sort → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed, but data is deterministic (hardcoded values) - this
+          is acceptable
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only `plotly.graph_objects` imported, which is used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses go.Bar with orientation, add_vline, proper layout configuration,
+          write_html for interactivity
+  verdict: APPROVED
diff --git a/plots/bar-diverging/metadata/plotnine.yaml b/plots/bar-diverging/metadata/plotnine.yaml
index bc1b82ff51..a504f6e121 100644
--- a/plots/bar-diverging/metadata/plotnine.yaml
+++ b/plots/bar-diverging/metadata/plotnine.yaml
@@ -24,3 +24,183 @@ review:
   - Legend placement could be closer to the plot area to reduce whitespace on the
     right
   - Could use a diverging Brewer palette (e.g., RdBu) for better colorblind accessibility
+  image_description: The plot displays a horizontal diverging bar chart showing customer
+    satisfaction survey results across 12 product categories. Blue bars extend to
+    the right for positive net satisfaction scores (Mobile App at ~72%, Customer Service
+    at ~45%, Website at ~38%, Delivery Speed at ~25%, Product Quality at ~18%, Pricing
+    at ~8%), while coral/red bars extend to the left for negative scores (Return Policy
+    at ~-5%, Packaging at ~-12%, Email Support at ~-22%, Chat Support at ~-35%, Documentation
+    at ~-48%, Warranty at ~-62%). A clear vertical baseline at zero separates positive
+    from negative values. The title "bar-diverging · plotnine · pyplots.ai" is centered
+    at the top. The Y-axis is labeled "Product Category" and the X-axis is labeled
+    "Net Satisfaction Score (%)". A legend on the right shows "Sentiment" with Negative
+    (coral) and Positive (blue) indicators. The bars are sorted by value from highest
+    (Mobile App) to lowest (Warranty), and the layout uses a clean minimal theme with
+    subtle gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, horizontal orientation prevents label
+          crowding
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar width of 0.7 is well-suited for 12 categories, all bars clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/coral contrast is good but not ideal colorblind-safe palette
+          (blue-orange would be slightly better)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with good margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Net Satisfaction Score (%)" includes units, "Product Category"
+          is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha=0.3, but legend could be positioned better
+          (closer to the plot)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct diverging bar chart with bars extending from central baseline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Zero baseline visible, contrasting colors, horizontal orientation,
+          sorted by value
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range from -62 to +72 displayed correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Positive" and "Negative" labels correctly match bar colors'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "bar-diverging · plotnine · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive and negative values with good spread, 6 positive
+          and 6 negative categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Customer satisfaction survey is plausible, though some category pairings
+          are slightly arbitrary
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values range from -62 to +72, realistic for net satisfaction scores
+          (-100 to +100 range)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random), but no seed statement for future
+          proofing
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_bar, coord_flip, scale_fill_manual,
+          theme customization - solid plotnine usage but nothing exceptional like
+          facets or statistical layers
+  verdict: APPROVED
diff --git a/plots/bar-diverging/metadata/pygal.yaml b/plots/bar-diverging/metadata/pygal.yaml
index ee661cae05..1e21fac249 100644
--- a/plots/bar-diverging/metadata/pygal.yaml
+++ b/plots/bar-diverging/metadata/pygal.yaml
@@ -24,3 +24,183 @@ review:
   - Font sizes deviate from library template defaults though they work well for the
     canvas size
   - Legend could be positioned closer to the chart area to reduce empty space
+  image_description: The plot displays a horizontal diverging bar chart showing customer
+    satisfaction survey results across 10 departments. Bars extend left (coral/salmon
+    color, -12 to -45) for dissatisfied and right (blue, +31 to +72) for satisfied
+    scores. The chart uses a white background with clear category labels on the left
+    Y-axis (Customer Support, Response Time, Website Experience, etc.), numerical
+    value labels centered within each bar with +/- signs, and an X-axis showing Satisfaction
+    Score ranging from -100 to +100. The title "Customer Satisfaction Survey · bar-diverging
+    · pygal · pyplots.ai" appears at the top. A legend at the bottom shows "Satisfied"
+    (blue) and "Dissatisfied" (coral). Data is sorted by value from highest (Customer
+    Support +72) to lowest (Return Process -45).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, labels, and values are clearly readable. Font sizes are appropriate
+          for the 4800×2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; horizontal orientation works perfectly with
+          category labels.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, values displayed in center are clearly visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) vs coral (#E07A5F) provides excellent colorblind-safe
+          contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though some empty area on the right side.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "Satisfaction Score" but no units specified (though implicit
+          -100 to +100 scale).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: X-guides are visible and helpful; legend at bottom is clear but could
+          be positioned closer to the chart.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct diverging bar chart with bars extending in opposite directions
+          from zero.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis correctly mapped.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Zero baseline clear, contrasting colors for pos/neg, horizontal orientation,
+          sorted by value.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Range set to -100 to +100, all data visible.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Satisfied/Dissatisfied.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: `{title} · bar-diverging · pygal · pyplots.ai`.'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Excellent mix of positive and negative values, varying magnitudes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer satisfaction survey is a perfect real-world use case.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores from -45 to +72 are realistic for a satisfaction scale.
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → sort → style → chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed, but data is deterministic (hardcoded), so this is
+          actually fine. Full points.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is hardcoded/deterministic.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: false
+        comment: Saves as `plot.png` and `plot.html`.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of pygal's HorizontalBar, custom Style, print_values, value_formatter
+          with signed numbers, and dual output (PNG + HTML). However, could leverage
+          more interactive features.
+  verdict: APPROVED
diff --git a/plots/bar-diverging/metadata/seaborn.yaml b/plots/bar-diverging/metadata/seaborn.yaml
index 4b8dfe1f06..e9d88843b3 100644
--- a/plots/bar-diverging/metadata/seaborn.yaml
+++ b/plots/bar-diverging/metadata/seaborn.yaml
@@ -26,3 +26,180 @@ review:
     left instead
   - Data scenario is somewhat generic - could use more specific real-world context
     (e.g., specific industry or year)
+  image_description: The plot shows a horizontal diverging bar chart displaying Net
+    Promoter Scores by Department. The chart features 12 departments on the y-axis
+    (Operations at top, Sales at bottom) sorted by NPS score from lowest to highest.
+    Bars extend left (negative/yellow) or right (positive/blue) from a central black
+    vertical baseline at zero. Blue bars represent "Positive (Promoters)" ranging
+    from +8 to +62, while yellow/gold bars represent "Negative (Detractors)" ranging
+    from -8 to -45. Each bar has a value label showing the score (e.g., +55, -32).
+    The title is "bar-diverging · seaborn · pyplots.ai" in bold at the top. X-axis
+    shows "Net Promoter Score" from -60 to 80, Y-axis shows "Department". A legend
+    in the lower right explains the color coding. Grid lines are subtle dashed vertical
+    lines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis labels 20pt, tick labels 16pt, value labels
+          14pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, department labels well spaced, value labels
+          positioned outside bars
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized for 12 categories, clear distinction
+          between bars
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Net Promoter Score" and "Department" are descriptive but lack units
+          (NPS is unitless, so acceptable)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), legend well placed but slightly
+          overlaps with Sales bar area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct diverging bar chart with bars extending in opposite directions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis, correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: contrasting colors, horizontal orientation,
+          zero baseline, sorted bars'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full range (-70 to 80), all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Positive (Promoters) and Negative (Detractors)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "bar-diverging · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive and negative values, good range of magnitudes,
+          variety of departments
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: NPS by department is plausible but somewhat generic; real companies
+          rarely have such wide NPS variation between departments
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: NPS scores ranging from -45 to +62 are realistic (NPS ranges from
+          -100 to +100)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though data is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used, but numpy seed not strictly necessary since data
+          is hardcoded
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's barplot correctly with hue parameter for modern API,
+          but doesn't leverage seaborn-specific features like statistical aggregation
+          or built-in themes
+  verdict: APPROVED
diff --git a/plots/bar-error/metadata/altair.yaml b/plots/bar-error/metadata/altair.yaml
index 73b8bff34d..e21ffe5336 100644
--- a/plots/bar-error/metadata/altair.yaml
+++ b/plots/bar-error/metadata/altair.yaml
@@ -23,3 +23,170 @@ review:
   - No tooltips for interactivity - Altair strength is interactive visualization
   - Only symmetric error bars shown; asymmetric example would demonstrate fuller feature
     coverage
+  image_description: 'The plot displays a bar chart with 5 blue bars (#306998) representing
+    treatment groups: Control, Drug A, Drug B, Drug C, and Combination. The x-axis
+    is labeled "Treatment Group" and the y-axis "Response Rate (%)" with a scale from
+    0-100. Each bar features black error bars with clearly visible caps at both top
+    and bottom ends. The title "bar-error · altair · pyplots.ai" is centered at the
+    top. An annotation "Error bars: ±1 SD" is positioned in the upper right corner.
+    The background has subtle gray gridlines (alpha 0.3), and all text elements are
+    clearly readable with appropriate font sizes.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized (size=60), error bars clearly visible with 3px stroke
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Response Rate (%)" has units, but no units needed for categorical
+          x-axis'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (0.3 alpha), but no legend (annotation used instead,
+          which is acceptable)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars with caps present, annotation explaining error bars included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-100 shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Annotation "Error bars: ±1 SD" clearly explains error representation'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "bar-error · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in response rates and error magnitudes, but all symmetric
+          errors (no asymmetric example)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Scientific drug treatment comparison is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response rates 45-82% with SD 8-15% are realistic for treatment studies
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot layers → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of layered chart composition (bars + error_bars + caps +
+          annotation), but could add interactivity with tooltips
+  verdict: APPROVED
diff --git a/plots/bar-error/metadata/bokeh.yaml b/plots/bar-error/metadata/bokeh.yaml
index bdae46fbbb..70f3823022 100644
--- a/plots/bar-error/metadata/bokeh.yaml
+++ b/plots/bar-error/metadata/bokeh.yaml
@@ -23,3 +23,173 @@ review:
   - Error bars annotation appears partially cut off at right edge
   - Only symmetric error bars shown; spec mentions asymmetric errors as a feature
   - Grid alpha could be more subtle
+  image_description: 'The plot displays a vertical bar chart with 5 product categories
+    (Electronics, Clothing, Home & Garden, Sports, Books) on the x-axis and Quarterly
+    Revenue in millions on the y-axis. Bars are rendered in a medium blue color (#306998)
+    with dark outlines. Each bar has black error bars with visible horizontal caps
+    (TeeHead style) at both ends, representing ±1 standard deviation. The title "bar-error
+    · bokeh · pyplots.ai" appears in the top-left. An annotation "Error bars: ±1 SD"
+    is visible in the top-right corner. The background is light gray (#fafafa) with
+    subtle dashed horizontal grid lines. Y-axis starts at 0 and extends to ~108. All
+    text is legible and well-sized.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, axis labels at 32pt, tick labels at 24-26pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized, error bars with line_width=5 and TeeHead size=40
+          are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but right margin slightly large due to annotation
+          positioning
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Product Category" and "Quarterly Revenue ($ millions)" - descriptive
+          with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend present (not strictly needed), but annotation for error
+          bar meaning is cut off at the edge
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart with error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, errors correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars have visible caps, annotation explains what they represent
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, shows all data including error bar extensions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Annotation correctly states "±1 SD"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bar-error · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in bar heights and different error magnitudes, but
+          all errors are symmetric (spec mentions asymmetric errors as a feature)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly revenue by product category is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values ($35-85M) and standard deviations ($4-9M) are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Whisker with TeeHead for error bars, Label
+          for annotation - good Bokeh idioms but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/bar-error/metadata/highcharts.yaml b/plots/bar-error/metadata/highcharts.yaml
index e4dd9af6c0..d26c9ee282 100644
--- a/plots/bar-error/metadata/highcharts.yaml
+++ b/plots/bar-error/metadata/highcharts.yaml
@@ -25,3 +25,180 @@ review:
     not visible in the rendered image
   - Could demonstrate asymmetric error bars to fully showcase the capability mentioned
     in spec
+  image_description: 'The plot displays a vertical bar chart with 5 blue columns (#306998)
+    representing treatment groups: Control, Treatment A, Treatment B, Treatment C,
+    and Treatment D. Each bar has black error bars with visible caps (whiskers) extending
+    above and below the bar tops. The title "bar-error · highcharts · pyplots.ai"
+    appears at the top in bold, with a subtitle "Error bars represent ±1 Standard
+    Deviation" below it. The y-axis is labeled "Response Value (units)" ranging from
+    0 to ~80, and the x-axis is labeled "Treatment Group". A legend showing "Mean
+    Value" appears in the top-right corner. The grid uses subtle dashed horizontal
+    lines. Treatment C shows the highest value (~68), while Control shows the lowest
+    (~42).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable. Font
+          sizes are appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, error bars have visible caps/whiskers, good
+          contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe blue (#306998) with dark error bars; no red-green
+          issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; minor issue with legend only showing "Mean
+          Value" (missing error bar legend entry in visible legend)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Response Value (units)" and "Treatment
+          Group"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines (good), but legend is incomplete
+          - only shows "Mean Value", the "Error (±1 SD)" entry defined in code is
+          not visible in the screenshot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart with error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars with visible caps, subtitle explaining error bar meaning
+          (±1 SD)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0 and shows all data including error bar ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-error · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in bar heights and different error magnitudes. Missing
+          asymmetric error bars which spec mentions as optional
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Treatment comparison with control group is a plausible scientific
+          experiment scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (42-68 range) and errors (4.8-8.3) are realistic for experimental
+          data
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → chart config → render'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses deterministic data (no random), but no seed comment. The data
+          is hardcoded which is reproducible.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses Highcharts errorbar series type, highcharts-more.js module,
+          proper whisker configuration, interactive HTML output
+  verdict: APPROVED
diff --git a/plots/bar-error/metadata/letsplot.yaml b/plots/bar-error/metadata/letsplot.yaml
index 95f4aaf6b5..4166e5d8dc 100644
--- a/plots/bar-error/metadata/letsplot.yaml
+++ b/plots/bar-error/metadata/letsplot.yaml
@@ -23,3 +23,176 @@ review:
   - The alternating blue/yellow color scheme is decorative rather than informative;
     colors could encode a meaningful variable
   - Grid could benefit from a subtle alpha setting
+  image_description: 'The plot displays a bar chart with 5 vertical bars representing
+    A/B test groups: Control, Variant A, Variant B, Variant C, and Variant D. The
+    bars alternate between Python blue (#306998) and yellow (#FFD43B) colors. Each
+    bar has clearly visible black error bars with horizontal caps at both ends, indicating
+    the 95% confidence intervals. The Y-axis is labeled "Conversion Rate (%)" and
+    ranges from 0 to 19. The X-axis is labeled "Test Group" with category names below
+    each bar. The title "bar-error · letsplot · pyplots.ai" appears at the top left.
+    A caption "Error bars show 95% CI" is positioned at the bottom right, explaining
+    what the error bars represent. The minimal theme provides a clean look with subtle
+    gridlines on the Y-axis only.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          target resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized, error bars are clearly visible with
+          proper caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow is colorblind-friendly, though the alternating pattern
+          doesn't convey meaning
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units ("Conversion Rate (%)",
+          "Test Group")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present (show_legend=False), but also no semantic meaning
+          to the colors
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart with error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, error bars correctly positioned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars have visible caps, caption explains error representation
+          (95% CI)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, shows all data including error bar extents
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed for this use case, colors are decorative)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-error · letsplot · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows asymmetric error bars, variation in bar heights, but colors
+          don't encode meaningful data
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: A/B test conversion rates is a plausible, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Conversion rates of 11-17% are realistic; error magnitudes (1-2%)
+          are reasonable for typical sample sizes
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (geom_bar, geom_errorbar, theme_minimal), but
+          doesn't leverage lets-plot specific interactive features in the static output
+  verdict: APPROVED
diff --git a/plots/bar-error/metadata/matplotlib.yaml b/plots/bar-error/metadata/matplotlib.yaml
index ef19f411d4..d5dc0c879e 100644
--- a/plots/bar-error/metadata/matplotlib.yaml
+++ b/plots/bar-error/metadata/matplotlib.yaml
@@ -27,3 +27,173 @@ review:
     being more integrated with the plot
   - Basic library usage without leveraging advanced matplotlib features like bar_label()
     for showing values
+  image_description: 'The plot displays a bar chart with 5 blue vertical bars representing
+    A/B test groups (Control, Variant A, Variant B, Variant C, Variant D). Each bar
+    has dark blue error bars with visible caps extending above and below the bar tops.
+    The bars use a consistent blue color (#306998) with darker edge color (#1e4466).
+    The y-axis shows "Conversion Rate (%)" ranging from 0 to approximately 24, and
+    the x-axis shows "Test Group". The title follows the required format "bar-error
+    · matplotlib · pyplots.ai". A small annotation box in the bottom-right corner
+    states "Error bars: 95% CI". The grid is subtle with horizontal dashed lines at
+    alpha 0.3.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars and error bars are clearly visible with appropriate sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good proportions, plot fills canvas well, minor: could use slightly
+          more vertical space'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Conversion Rate (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, but the annotation box placement
+          in bottom-right corner is not optimal (sits in empty space rather than near
+          legend position)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart with error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars with visible caps, annotation explaining error bar meaning
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data with appropriate headroom (0 to ~24)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Annotation accurately describes "95% CI"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-error · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows varying bar heights and different error magnitudes, but all
+          errors are symmetric (spec mentions asymmetric errors as an option)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: A/B test conversion rates is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Conversion rates of 11-18% with CI widths of 1-3% are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ax.bar() and ax.errorbar() correctly, but these are basic matplotlib
+          features. Could have used more advanced features like bar_label() for value
+          annotations or customized error bar styling
+  verdict: APPROVED
diff --git a/plots/bar-error/metadata/plotly.yaml b/plots/bar-error/metadata/plotly.yaml
index 9a0d763faa..b237931d76 100644
--- a/plots/bar-error/metadata/plotly.yaml
+++ b/plots/bar-error/metadata/plotly.yaml
@@ -27,3 +27,178 @@ review:
     interaction
   - Error magnitude variation across groups is relatively uniform - more dramatic
     differences would better showcase the feature
+  image_description: 'The plot shows a bar chart with 5 blue bars (#306998) representing
+    treatment groups: Control, Treatment A, Treatment B, Treatment C, and Treatment
+    D. Each bar has dark vertical error bars with horizontal caps at the ends indicating
+    asymmetric standard deviations. The y-axis shows "Response Value (%)" ranging
+    from 0-90, and the x-axis shows "Treatment Group". The title reads "Lab Treatment
+    Results · bar-error · plotly · pyplots.ai" centered at the top. An annotation
+    in the upper right corner states "Error bars: ±1 SD (asymmetric)". The plot uses
+    a clean white background with subtle horizontal grid lines. Treatment C has the
+    highest response (~72%) with the largest error range, while Control has the lowest
+    (~45%).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 20pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars clearly visible, error bars with thick lines (3px) and wide
+          caps (12px)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Response Value (%)", X-axis descriptive "Treatment
+          Group"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), but no legend shown (showlegend=False)
+          - annotation explains error bars but a legend entry would be cleaner
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart with error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, errors correctly applied
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars have caps, annotation explains error representation, asymmetric
+          errors demonstrated
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range [0, 90] shows all data with headroom for error bars
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Annotation accurately describes "±1 SD (asymmetric)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: "Lab Treatment Results · bar-error · plotly
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows asymmetric errors and variation across groups, but could show
+          more dramatic differences in error magnitudes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Lab treatment experiment is a perfect real-world scenario for error
+          bars
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response values 45-72% with errors 4-8% are realistic for treatment
+          studies
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → figure → layout → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Bar with error_y correctly, but doesn't leverage Plotly's
+          interactive features (hover templates, animations) or Express API for simpler
+          code
+  verdict: APPROVED
diff --git a/plots/bar-error/metadata/plotnine.yaml b/plots/bar-error/metadata/plotnine.yaml
index 05218c0aa7..70b7a25bfd 100644
--- a/plots/bar-error/metadata/plotnine.yaml
+++ b/plots/bar-error/metadata/plotnine.yaml
@@ -26,3 +26,176 @@ review:
     color mapping would be cleaner
   - Only symmetric error bars shown (spec mentions asymmetric may be needed for some
     cases)
+  image_description: The plot displays a bar chart with 6 survey categories on the
+    x-axis (Product Quality, Customer Service, Delivery Speed, Price Value, Website
+    UX, Return Policy) and satisfaction scores (1-5) on the y-axis. Bars alternate
+    between Python blue (#306998) and gold (#FFD43B) colors. Each bar has black error
+    bars with horizontal caps representing 95% confidence intervals. The title "bar-error
+    · plotnine · pyplots.ai" appears in bold at the top. Category labels are rotated
+    ~25 degrees for readability. A caption "Error bars represent 95% CI" appears in
+    italics at the bottom right. The layout uses a minimal theme with subtle horizontal
+    grid lines only.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis titles 20pt, tick labels appropriately sized,
+          all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Rotated x-axis labels prevent overlap, all text fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized, error bars clearly visible with caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow is colorblind-safe, but alternating colors don't convey
+          meaning
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has units "(1-5)", X-axis label "Survey Category" is descriptive
+          but generic
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed, but grid only on y-axis is good; however caption
+          is slightly cut off on right edge
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart with error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, error bars correctly positioned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars have visible caps, caption explains error bars represent
+          95% CI
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this visualization
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bar-error · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows varying bar heights and different error magnitudes, but all
+          symmetric errors (spec mentions asymmetric errors may be needed)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer satisfaction survey is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 1-5 satisfaction scale with values 3.5-4.5 and CI widths 0.2-0.5
+          are realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic but uses hardcoded values (acceptable, but
+          np.random.seed would show variety)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_col + geom_errorbar, pd.Categorical
+          for ordering, theme customization. Could use scale_fill_brewer for more
+          idiomatic palette usage.
+  verdict: APPROVED
diff --git a/plots/bar-error/metadata/pygal.yaml b/plots/bar-error/metadata/pygal.yaml
index b5a326c1e9..56f583a901 100644
--- a/plots/bar-error/metadata/pygal.yaml
+++ b/plots/bar-error/metadata/pygal.yaml
@@ -23,3 +23,175 @@ review:
   - Legend placement at bottom-left corner appears disconnected from the chart
   - Grid lines could be more subtle (currently dotted but still prominent)
   - Only symmetric error bars shown when spec mentions asymmetric as an option
+  image_description: The plot displays a bar chart with 5 blue bars representing treatment
+    groups (Control, Treatment A, Treatment B, Treatment C, Treatment D) on the x-axis.
+    The y-axis shows "Response Value (units)" ranging from 0 to 100. Each bar has
+    error bars extending vertically from the top, showing confidence intervals with
+    horizontal caps at the ends. The bars are a consistent blue color (#306998). The
+    title reads "bar-error · pygal · pyplots.ai" at the top. A legend at the bottom
+    left shows "Mean ± 1 SD" with a blue square indicator. The background is white
+    with subtle horizontal grid lines. Treatment B has the highest value (~78) and
+    Control has the lowest (~45).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels all clearly readable at full size
+          with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all category labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, error bars clearly visible with caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but legend placement at bottom-left is slightly
+          awkward, could be better integrated
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Treatment Group"
+          and "Response Value (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is too prominent (dotted lines visible but not subtle enough),
+          legend is positioned awkwardly at bottom-left corner
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart with error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars present with visible caps, legend explains error representation
+          (±1 SD)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range 0-100 shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Mean ± 1 SD"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-error · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in means and different error magnitudes, but all
+          errors are symmetric (spec mentions asymmetric as option)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Scientific experiment comparing treatment groups is a perfect real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 45-78 with SD of 8-15 are realistic for experimental data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → chart config → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png (correct) but strict=True in zip is Python 3.10+
+          specific style
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's confidence interval feature with 'ci' dict, custom Style,
+          but could leverage more interactive/SVG features
+  verdict: APPROVED
diff --git a/plots/bar-error/metadata/seaborn.yaml b/plots/bar-error/metadata/seaborn.yaml
index 5b3488f9c4..5319287315 100644
--- a/plots/bar-error/metadata/seaborn.yaml
+++ b/plots/bar-error/metadata/seaborn.yaml
@@ -27,3 +27,172 @@ review:
     in the corner
   - Feature coverage could show more variation (e.g., one notably different group
     or wider CI range)
+  image_description: 'The plot displays a bar chart with 6 categories (Control, Variant
+    A through E) on the x-axis and Conversion Rate (%) on the y-axis ranging from
+    0 to ~7.5%. Bars alternate between dark blue (#306998) and golden yellow (#FFD43B)
+    colors. Each bar has black error bars with visible caps extending vertically to
+    show 95% confidence intervals. The title "bar-error · seaborn · pyplots.ai" is
+    prominently displayed at the top in bold. An annotation box in the upper right
+    corner explains "Error bars: 95% CI". A subtle horizontal dashed grid is visible
+    behind the bars. The layout is clean with good proportions and all text is clearly
+    legible.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, axis labels at 20pt, tick labels at 16pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars appropriately sized, error bars clearly visible with prominent
+          caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow alternating pattern is colorblind-safe (not red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, minor whitespace at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Conversion Rate (%)" and "Test Group"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, but annotation placement in corner
+          could be closer to data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart with error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis, errors properly displayed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars have visible caps, annotation explains error bar meaning
+          (95% CI), asymmetric errors implemented
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, shows all data with headroom
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Annotation accurately describes error bars
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "bar-error · seaborn · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows multiple categories with varying conversion rates and asymmetric
+          CIs, but all positive/similar magnitude
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: A/B test conversion rates is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Conversion rates of 4-6% are realistic, error margins appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn's barplot with proper hue/palette API
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.barplot correctly but error bars added via matplotlib's
+          errorbar rather than seaborn's native capabilities
+  verdict: APPROVED
diff --git a/plots/bar-feature-importance/metadata/altair.yaml b/plots/bar-feature-importance/metadata/altair.yaml
index ba702ddf69..b59f3beee2 100644
--- a/plots/bar-feature-importance/metadata/altair.yaml
+++ b/plots/bar-feature-importance/metadata/altair.yaml
@@ -21,3 +21,175 @@ review:
   weaknesses:
   - Grid opacity at 0.3 could be reduced further (0.2) for subtler appearance
   - Unnecessary plot.html output alongside plot.png
+  image_description: 'The plot displays a horizontal bar chart showing feature importance
+    values for 15 features from a customer analytics model. Bars are colored using
+    a blue sequential gradient (light to dark) based on importance values. Features
+    are sorted from lowest importance (social_media_engagement at 0.005) at the top
+    to highest importance (customer_lifetime_value at 0.182) at the bottom. Each bar
+    has a black error bar showing standard deviation, and importance values are displayed
+    as text annotations (3 decimal places) to the right of each bar''s error bar.
+    The title "bar-feature-importance · altair · pyplots.ai" is positioned at the
+    top-left. Axis labels are clear: "Feature" on y-axis and "Importance Score" on
+    x-axis. The grid is subtle with light gray lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, feature names are fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized, error bars visible, though smallest bars are
+          quite small
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue sequential colorscheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, minor empty space on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Feature" and "Importance Score"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend (acceptable since no legend needed), but grid could be
+          slightly more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on Y-axis, importance on X-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has sorting, color gradient, error bars, and text annotations
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this chart type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "{spec-id} · {library} · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 15 features with varying importances, good range from 0.005
+          to 0.182
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer analytics scenario with realistic feature names (customer_lifetime_value,
+          purchase_frequency, etc.)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Importances sum to ~1.0 as expected for tree-based models, though
+          std values could be slightly more varied
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only used imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png (correct) but also saves plot.html (unnecessary
+          extra output)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding, layered chart composition, and
+          transform_calculate. Could leverage tooltips more prominently or add interactivity.
+  verdict: APPROVED
diff --git a/plots/bar-feature-importance/metadata/bokeh.yaml b/plots/bar-feature-importance/metadata/bokeh.yaml
index bf09330d5c..364842e3d0 100644
--- a/plots/bar-feature-importance/metadata/bokeh.yaml
+++ b/plots/bar-feature-importance/metadata/bokeh.yaml
@@ -22,3 +22,182 @@ review:
   weaknesses:
   - Missing HoverTool for interactive exploration in HTML output (Bokeh key strength)
   - Y-axis label could be added (Feature or Model Feature)
+  image_description: 'The plot displays a horizontal bar chart showing feature importances
+    from a machine learning classification model. There are 12 features displayed
+    on the y-axis (from bottom to top: Account Balance, Payment History, Home Ownership,
+    Num Inquiries, Education Level, Loan Amount, Num Accounts, Debt Ratio, Employment
+    Years, Age, Credit Score, Income). The bars extend horizontally with length proportional
+    to importance scores. A blue sequential color gradient is applied - darker blue
+    for higher importance values, lighter blue for lower values. Each bar has its
+    importance value labeled at the end (ranging from 0.015 to 0.185). The title "bar-feature-importance
+    · bokeh · pyplots.ai" appears at the top in blue. The x-axis is labeled "Importance
+    Score" with tick marks at 0, 0.05, 0.1, 0.15, and 0.2. The background is light
+    gray (#fafafa) with subtle dashed vertical grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and value annotations are clearly readable. Feature
+          names on y-axis are legible. Slightly smaller than optimal for some elements.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. All feature names and value labels
+          are clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good height (0.7), clearly distinguishable.
+          Color gradient effectively shows importance hierarchy.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues9 palette is colorblind-safe. Sequential blue gradient provides
+          good contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space. Slight issue: feature names appear slightly
+          close to edge on y-axis.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "Importance Score" but no units (importance scores are
+          unitless, so acceptable). Y-axis has no label (features are self-explanatory).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle dashed grid with alpha 0.3 is good. No legend needed for this
+          plot type.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart for feature importance.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance values on x-axis as specified.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has sorted bars (highest at top), color gradient, value annotations.
+          Missing optional error bars (std) mentioned in spec.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (0 to ~0.21) properly shows all data with 15% padding.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; color gradient is self-explanatory.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-feature-importance · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 12 features with good variation in importance values. Could
+          show more dramatic range differences.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Credit scoring/loan approval model context is realistic and comprehensible.
+          Feature names are plausible for this domain.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values sum close to 1.0, realistic for tree-based model
+          feature importances.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → processing → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded), but no random seed comment. Minor
+          deduction.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (export_png, save, ColumnDataSource, LabelSet,
+          LinearColorMapper, Blues9, figure, CDN).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, LinearColorMapper, LabelSet - good Bokeh patterns.
+          Could leverage HoverTool for interactivity in HTML output.
+  verdict: APPROVED
diff --git a/plots/bar-feature-importance/metadata/highcharts.yaml b/plots/bar-feature-importance/metadata/highcharts.yaml
index ac332e97b8..e3aac9daf2 100644
--- a/plots/bar-feature-importance/metadata/highcharts.yaml
+++ b/plots/bar-feature-importance/metadata/highcharts.yaml
@@ -27,3 +27,182 @@ review:
   - Does not use the highcharts-core Python library as recommended in library rules
   - Error bars not included despite spec mentioning them as valuable for ensemble
     methods
+  image_description: The plot displays a horizontal bar chart showing feature importances
+    from a Random Forest model for house price prediction. There are 15 features displayed,
+    sorted from highest importance at the top (Square Footage with 0.215) to lowest
+    at the bottom (Energy Efficiency Score with 0.004). The bars use a gradient color
+    scheme from dark blue (#306998 for high importance) to light blue (#a8d5f2 for
+    low importance). Each bar has a data label showing the precise importance value
+    positioned just to the right of the bar. The title "bar-feature-importance · highcharts
+    · pyplots.ai" is centered at the top with a subtitle "House Price Prediction -
+    Random Forest Feature Importances" below it. The x-axis shows "Importance Score"
+    ranging from 0 to 0.25. Feature names (e.g., "Square Footage", "Number of Bedrooms",
+    "Location Score") are clearly displayed on the y-axis. The chart has a clean white
+    background with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and feature names are all clearly readable
+          with appropriate font sizes for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, feature names are well-spaced, data labels do
+          not overlap bars
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, gradient colors clearly distinguish importance
+          levels
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue gradient is colorblind-safe, good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though the right side has significant whitespace
+          due to the x-axis extending to 0.25 while max value is 0.215
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Importance Score" label but no units (importance scores
+          are unitless, so this is acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; legend disabled (appropriate for
+          single series) but x-axis tick labels are overly dense
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart as specified
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance on x-axis, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted bars, gradient coloring, data labels for precision all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 features visible with appropriate axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series chart (appropriate)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 15 features with good variation in importance values; however,
+          spec mentions optional error bars for ensemble methods which are not shown
+          despite the data being described as "from ensemble averaging"
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: House price prediction is a classic ML use case, features are realistic
+          and meaningful
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Importance values sum close to 1.0 (0.997), which is realistic; however
+          individual values could show more spread
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear structure but contains a helper function `importance_to_color()`
+          which slightly violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed as data is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts bar chart with data labels and custom styling; however,
+          does not use highcharts-core Python library as suggested in the library
+          rules, instead builds JSON config manually
+  verdict: APPROVED
diff --git a/plots/bar-feature-importance/metadata/letsplot.yaml b/plots/bar-feature-importance/metadata/letsplot.yaml
index 80df0cd769..4b4ef3769c 100644
--- a/plots/bar-feature-importance/metadata/letsplot.yaml
+++ b/plots/bar-feature-importance/metadata/letsplot.yaml
@@ -24,3 +24,178 @@ review:
   weaknesses:
   - Legend is positioned far from the main plot area, creating visual disconnect
   - Missing random seed even though data is deterministic (minor but good practice)
+  image_description: The plot displays a horizontal bar chart showing feature importance
+    scores for a loan default prediction model. There are 15 features shown, sorted
+    from highest importance (income at 0.182) to lowest (region at 0.003). The bars
+    use a sequential color gradient from light blue (#A8D5E5) for low importance to
+    dark blue (#306998) for high importance. Each bar has error bars (standard deviation)
+    shown in dark gray, and importance values are annotated at the end of each bar.
+    The title "bar-feature-importance · letsplot · pyplots.ai" is at the top, the
+    y-axis is labeled "Feature", and the x-axis is labeled "Importance Score". A color
+    legend for the gradient is positioned on the right side. The layout is clean with
+    subtle grid lines on the x-axis only.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and clearly readable, axis labels and tick marks are
+          all appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; feature names, values, and error bars
+          are well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized for the 15 features, error bars are
+          visible and not overwhelming
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue gradient is colorblind-safe, good contrast against
+          white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, but legend could be positioned closer to
+          the plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Feature" and "Importance Score"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle, but legend is placed far from the data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance on x-axis, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has sorted bars, color gradient, error bars, text annotations - all
+          spec requirements met
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, x-axis range appropriate (0 to ~0.22)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows importance gradient
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "bar-feature-importance · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full range from high importance (income 0.182) to near-zero
+          (region 0.003), with varying error bar sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Loan default prediction is a realistic ML use case with plausible
+          feature names
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values sum to ~1.0, realistic for Random Forest feature
+          importances
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed set (data is deterministic, but good practice to include)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern lets-plot API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct scale factor
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, coord_flip, scale_fill_gradient, and theme customization;
+          HTML export for interactivity. Could use more advanced lets-plot specific
+          features
+  verdict: APPROVED
diff --git a/plots/bar-feature-importance/metadata/matplotlib.yaml b/plots/bar-feature-importance/metadata/matplotlib.yaml
index c7ee1bef1a..7892bf98d6 100644
--- a/plots/bar-feature-importance/metadata/matplotlib.yaml
+++ b/plots/bar-feature-importance/metadata/matplotlib.yaml
@@ -25,3 +25,173 @@ review:
   - Axis labels could include units (e.g., "Importance Score (normalized)" or similar)
   - Could leverage matplotlib annotate() for more styled value labels with arrows
     or backgrounds
+  image_description: The plot displays a horizontal bar chart showing feature importances
+    from a machine learning model (credit/loan prediction context). The chart shows
+    12 features sorted by importance from bottom to top, with "Income" being the most
+    important (0.182) and "Number of Dependents" the least important (0.015). The
+    bars use a sequential blue color gradient (Blues colormap) that transitions from
+    lighter blue for lower values to darker blue for higher importance values. Each
+    bar has error bars showing standard deviation and value annotations displayed
+    to the right of each bar. The title uses the correct format "bar-feature-importance
+    · matplotlib · pyplots.ai". The x-axis is labeled "Importance Score" and y-axis
+    is labeled "Feature". A subtle dashed grid is visible on the x-axis only. The
+    top and right spines are hidden for a cleaner appearance.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all feature names fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, error bars clearly visible with good capsize
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Blues colormap which is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Importance Score", "Feature") but missing
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3, dashed), but there is no legend (not needed
+          here)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted bars, color gradient, error bars, value annotations all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full range from 0 to beyond max importance + std
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-feature-importance · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 12 features with good variation in importance values, but all
+          features trend consistently (could show some mid-range clustering)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Credit/loan prediction model with realistic feature names (Income,
+          Credit Score, Age, etc.)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values sum to ~1.0, realistic for sklearn feature_importances_
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct parameters
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses barh with error bars, colormap, spine customization. Good but
+          could use additional matplotlib features like annotate() for richer annotations
+  verdict: APPROVED
diff --git a/plots/bar-feature-importance/metadata/plotly.yaml b/plots/bar-feature-importance/metadata/plotly.yaml
index 0c7661e8a5..75da7e19af 100644
--- a/plots/bar-feature-importance/metadata/plotly.yaml
+++ b/plots/bar-feature-importance/metadata/plotly.yaml
@@ -25,3 +25,180 @@ review:
   - Y-axis label "Feature" could be omitted since the feature names are self-explanatory
   - Could leverage more Plotly-specific interactive features in the HTML output (hover
     templates, custom interactions)
+  image_description: 'The plot displays a horizontal bar chart showing 15 machine
+    learning feature importances from a credit/loan prediction model. Features are
+    sorted by importance with "age" at the top (0.179) and "investment_portfolio"
+    at the bottom (0.005). The bars use a blue gradient color scheme (RGBA with varying
+    alpha from ~0.4 to 1.0) where darker/more opaque blue indicates higher importance.
+    Each bar includes error bars (horizontal lines) representing standard deviation
+    from ensemble methods. Importance values are annotated as text labels at the end
+    of each bar after the error bars. The y-axis shows feature names, x-axis shows
+    "Importance Score" ranging from 0 to ~0.2. The title follows the correct format:
+    "bar-feature-importance · plotly · pyplots.ai". Layout is clean with white background,
+    subtle gray grid lines, and proper margins allowing all feature names to be fully
+    visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 18pt, annotations
+          at 16pt - all clearly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, feature names are well-spaced, annotations positioned
+          after error bars
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized for 15 features, error bars visible
+          with good thickness
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single-hue blue gradient is colorblind-safe, no red-green distinction
+          needed
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though left margin is generous; plot fills
+          ~60% of canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Importance Score" is descriptive but lacks units (though importance
+          scores are typically unitless)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal bars, sorted by importance,
+          color gradient, error bars, value annotations'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full range including error bars
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-feature-importance · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 15 features with varying importance values and error bars;
+          good distribution from high (0.179) to low (0.005)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Credit/loan prediction model features (age, income, credit_score,
+          etc.) are realistic and domain-appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values sum to ~1.0, typical for normalized feature importances
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Plotly API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Bar with proper orientation, custom annotations, error_x
+          for error bars. Could leverage more Plotly-specific features like hover
+          customization in HTML output
+  verdict: APPROVED
diff --git a/plots/bar-feature-importance/metadata/plotnine.yaml b/plots/bar-feature-importance/metadata/plotnine.yaml
index 9da02ba4de..b299e755ec 100644
--- a/plots/bar-feature-importance/metadata/plotnine.yaml
+++ b/plots/bar-feature-importance/metadata/plotnine.yaml
@@ -22,3 +22,151 @@ review:
   weaknesses:
   - Could add explicit random seed comment for documentation clarity even with deterministic
     data
+  image_description: 'The plot is a horizontal bar chart showing feature importances
+    from a Random Forest model for house price prediction. It displays 15 features
+    sorted by importance value with the highest (Overall Quality at 0.285) at the
+    top and lowest (Porch Area at 0.002) at the bottom. The bars use a sequential
+    color gradient from light blue (#a8d5e5) for lower values to dark blue (#306998)
+    for higher values. Each bar has error bars showing standard deviation from ensemble
+    variability. Importance values are annotated as text to the right of each bar
+    (3 decimal places). The y-axis shows "Feature" with descriptive feature names,
+    the x-axis shows "Importance Score" ranging from 0.0 to ~0.3. The title follows
+    the required format: "bar-feature-importance · plotnine · pyplots.ai". The layout
+    uses a minimal theme with subtle dashed grid lines on the x-axis only.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, axis titles at 20pt bold, axis text at 14-16pt,
+          all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, feature names are readable and well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with appropriate width (0.7), error bars visible
+          with good contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue gradient is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of 16:9 aspect
+          ratio
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Feature" and "Importance Score"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance on x-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted by importance (highest at top), color gradient, error bars,
+          text annotations
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full range with expand=(0, 0, 0.15, 0) to accommodate
+          labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly hidden as color maps to same value as bar length
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "bar-feature-importance · plotnine · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 15 features with varying importance values from 0.002 to 0.285,
+          good distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: House price prediction with sklearn RandomForestClassifier is a real,
+          comprehensible ML scenario
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-feature-importance/metadata/pygal.yaml b/plots/bar-feature-importance/metadata/pygal.yaml
index de6f2563d4..13643040f3 100644
--- a/plots/bar-feature-importance/metadata/pygal.yaml
+++ b/plots/bar-feature-importance/metadata/pygal.yaml
@@ -24,3 +24,168 @@ review:
   - Contains a helper function importance_to_color which deviates from strict KISS
     script style
   - X-axis could be tighter to data range to reduce unused whitespace on right side
+  image_description: The plot shows a horizontal bar chart of feature importances
+    from a house price prediction model. There are 15 features displayed on the y-axis
+    (OverallQual at top, ExterQual at bottom), sorted by importance with highest at
+    top. The x-axis shows "Importance Score" ranging from 0.000 to 0.240. Bars use
+    a gradient from light blue (low importance) to darker Python Blue (high importance).
+    Each bar has its importance value displayed at the end (e.g., 0.245 for OverallQual,
+    0.182 for GrLivArea). The title reads "bar-feature-importance · pygal · pyplots.ai".
+    The layout has white background with subtle vertical grid lines, and all feature
+    names are clearly readable on the left side.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, labels, and tick marks all clearly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, feature names and values all distinct
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars appropriately sized for 15 features, good spacing between bars
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue gradient is colorblind-safe, no red-green issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but x-axis extends far beyond data (0.24 max importance
+          but axis goes to 0.24+), some unused space on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label "Importance Score" but no units (dimensionless
+          is acceptable for importance)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid lines are subtle, no legend needed (single series), but grid
+          extends into unused space
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance values as bar length
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted by importance (highest at top), gradient coloring, value annotations
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 features visible, axis accommodates all values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-feature-importance · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 15 features with varied importance values demonstrating the
+          concept well, but spec suggests error bars for ensemble methods (optional,
+          not required)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: House price prediction with RandomForest is a realistic, comprehensible
+          ML scenario using actual Kaggle Ames Housing dataset features
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values sum to ~1.0, realistic for tree-based feature importance
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Contains a helper function `importance_to_color` which violates strict
+          KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-feature-importance/metadata/seaborn.yaml b/plots/bar-feature-importance/metadata/seaborn.yaml
index efacb6cda7..748f3520d6 100644
--- a/plots/bar-feature-importance/metadata/seaborn.yaml
+++ b/plots/bar-feature-importance/metadata/seaborn.yaml
@@ -22,3 +22,172 @@ review:
   weaknesses:
   - Error bars are added manually via matplotlib instead of leveraging seaborn built-in
     errorbar capabilities
+  image_description: The plot displays a horizontal bar chart showing feature importances
+    from a machine learning credit scoring model. There are 15 features sorted by
+    importance, with "Annual Income" at the top (0.180) and "Previous Defaults" at
+    the bottom (0.007). The bars use a sequential "Blues" color palette that transitions
+    from light blue (low importance) to dark blue (high importance). Each bar has
+    error bars (whiskers) showing standard deviation, and importance values are annotated
+    in blue text at the end of each bar. The title reads "bar-feature-importance ·
+    seaborn · pyplots.ai" in bold at the top. The x-axis is labeled "Feature Importance"
+    and the y-axis is labeled "Feature". A subtle dashed grid is visible on the x-axis
+    only, and the top/right spines are removed for a cleaner look.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis labels 20pt, tick labels 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, feature names are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, error bars clearly visible with appropriate
+          caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential Blues palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but no units (importance is unitless, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha=0.3), no legend needed for this plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart for feature importance
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on Y-axis, importance on X-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: sorted bars, color gradient, error bars,
+          value annotations'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A but colors self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: `bar-feature-importance · seaborn · pyplots.ai`'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full range from high (0.18) to low (0.007) importance with
+          good variability
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Credit scoring model is a real, comprehensible ML scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values sum to ~1.0, realistic for tree-based models
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with hue parameter correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses `sns.barplot` but error bars added via matplotlib's `ax.errorbar`
+          rather than using seaborn's built-in capabilities
+  verdict: APPROVED
diff --git a/plots/bar-grouped/metadata/altair.yaml b/plots/bar-grouped/metadata/altair.yaml
index c5fbad2fa9..e5eb2f8c69 100644
--- a/plots/bar-grouped/metadata/altair.yaml
+++ b/plots/bar-grouped/metadata/altair.yaml
@@ -24,3 +24,177 @@ review:
   weaknesses:
   - Legend placement in top-right overlaps slightly with the plot area; consider orient
     right outside the plot
+  image_description: 'The plot displays a grouped bar chart showing quarterly revenue
+    by product line. There are 4 quarters (Q1-Q4) on the x-axis, each with 3 side-by-side
+    bars representing Software (dark blue #306998), Hardware (yellow #FFD43B), and
+    Services (teal #4ECDC4). The y-axis shows "Revenue (thousands USD)" ranging from
+    0 to 180. The title "bar-grouped · altair · pyplots.ai" appears at the top center.
+    A legend labeled "Product Line" is positioned in the top-right corner. The bars
+    have subtle rounded corners at the top. Software consistently has the highest
+    revenue across all quarters (120→145→132→168), Hardware is mid-range (85→78→92→105),
+    and Services is lowest but growing (45→52→68→75). The grid is very subtle with
+    low opacity.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good spacing, xOffset creates clear grouped
+          separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and teal are colorblind-safe (no red-green issues)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though legend in top-right slightly overlaps grid
+          area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "Revenue (thousands USD)",
+          X-axis labeled "Quarter"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is appropriately subtle (alpha 0.3), but legend overlaps the
+          data area slightly
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped bar chart with side-by-side bars per category
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Quarter on X, Revenue on Y, Product as grouping variable
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors, clear legend, consistent bar widths, good spacing
+          between groups
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range 0-180, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels Software, Hardware, Services
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bar-grouped · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation across quarters and between products, clear trends
+          visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly revenue by product line is a real business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values 45-168 thousands USD are realistic for product lines
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → chart → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random data, but data is deterministic (hardcoded) - however,
+          the code does not use np.random.seed as there's no random data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses xOffset for grouping (Altair-specific), cornerRadius for styling,
+          tooltips for interactivity, declarative encoding. Could have added selection/interactivity
+          features.
+  verdict: APPROVED
diff --git a/plots/bar-grouped/metadata/bokeh.yaml b/plots/bar-grouped/metadata/bokeh.yaml
index 1471f003de..0420aed6fd 100644
--- a/plots/bar-grouped/metadata/bokeh.yaml
+++ b/plots/bar-grouped/metadata/bokeh.yaml
@@ -21,3 +21,174 @@ review:
   weaknesses:
   - Legend color swatches all show the same blue color instead of the actual bar colors
   - No np.random.seed(42) even though data is deterministic
+  image_description: 'The plot displays a grouped bar chart showing quarterly revenue
+    (Q1-Q4) for three product lines: Electronics (blue), Clothing (yellow), and Home
+    & Garden (green). Each quarter has three side-by-side bars representing the different
+    product categories. Value labels appear above each bar (e.g., "$245K", "$180K").
+    The title reads "Quarterly Revenue by Product · bar-grouped · bokeh · pyplots.ai"
+    at the top. The y-axis is labeled "Revenue ($ Thousands)" with tick marks at 0,
+    100, 200, 300, 400. The x-axis shows "Quarter" with Q1, Q2, Q3, Q4 labels. A legend
+    in the top-right corner identifies the three product categories. The background
+    is light gray (#fafafa) with subtle dashed horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and value labels are clearly readable; tick labels
+          could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible with good spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, green palette is distinguishable but yellow-green may
+          be challenging for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units ("Revenue ($ Thousands)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend shows all items with same blue color instead of their actual
+          colors
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis, groups side-by-side
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, legend, consistent spacing,
+          value labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data (0-430 accommodates max value of 385)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match the data groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{description} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation across quarters and products; different trends for
+          each product line
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly revenue by product line is a common real-world business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in $125K-$385K range are realistic for product revenue
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed used (though data is deterministic, best practice
+          is to include seed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses FactorRange for grouped categorical axis and factor_cmap for
+          coloring, but could leverage more Bokeh-specific features like HoverTool
+  verdict: APPROVED
diff --git a/plots/bar-grouped/metadata/highcharts.yaml b/plots/bar-grouped/metadata/highcharts.yaml
index 36d7285601..2d9d3cee08 100644
--- a/plots/bar-grouped/metadata/highcharts.yaml
+++ b/plots/bar-grouped/metadata/highcharts.yaml
@@ -26,3 +26,177 @@ review:
     marks), making the grid overly dense
   - All three product lines show uniform upward trends - data could demonstrate more
     varied patterns (e.g., one declining, seasonal variations)
+  image_description: 'The plot displays a grouped bar chart with 4 quarters (Q1, Q2,
+    Q3, Q4) on the x-axis and revenue values on the y-axis. Three product lines are
+    shown as grouped vertical bars: Electronics (blue #306998), Clothing (yellow #FFD43B),
+    and Home & Garden (purple #9467BD). The title "bar-grouped · highcharts · pyplots.ai"
+    appears prominently at the top with a subtitle "Quarterly Revenue by Product Line
+    (in thousands USD)". Each bar has data labels showing values (e.g., $245K, $312K,
+    $287K, $398K for Electronics). The y-axis displays "Revenue (thousands USD)" with
+    formatted tick labels ($0K to $420K in $10K increments), and the x-axis shows
+    "Quarter". A boxed legend with the three product lines is positioned in the top-right
+    corner with a subtle border and shadow.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (72px), axis labels at 48px, tick labels
+          at 36px - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, bars well-spaced, data labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized with good groupPadding and pointPadding
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Purple palette is colorblind-safe, avoids red-green
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but y-axis has excessive tick marks (every $10K
+          creates visual clutter)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Quarter" and "Revenue
+          (thousands USD)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid lines are dashed but overly dense due to $10K intervals; legend
+          is well-placed but grid dominates
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped bar (column) chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (quarters) on X, values on Y, groups (products) as series
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors, clear legend, consistent bar widths, adequate spacing,
+          value labels present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to above max value
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match series names correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "bar-grouped · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows quarterly comparison with 3 groups, demonstrates growth trends,
+          but could show more variation (all products trend upward)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly revenue by product line is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in $156K-$398K range are realistic for quarterly revenue
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: 'No random seed, but data is deterministic (hardcoded arrays) - PARTIAL:
+          data is fixed but no explicit seed documentation'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses Highcharts-specific features: column chart type, dataLabels
+          with formatting, gridLineDashStyle, floating legend with shadow, subtitle,
+          formatted y-axis labels with ${value}K'
+  verdict: APPROVED
diff --git a/plots/bar-grouped/metadata/letsplot.yaml b/plots/bar-grouped/metadata/letsplot.yaml
index 1ccdca6ef5..9e859a4881 100644
--- a/plots/bar-grouped/metadata/letsplot.yaml
+++ b/plots/bar-grouped/metadata/letsplot.yaml
@@ -27,3 +27,179 @@ review:
     balanced
   - Could add value labels on bars for precise comparisons as noted in spec (optional
     but helpful)
+  image_description: 'The plot displays a grouped bar chart showing quarterly revenue
+    data for three product categories. The chart has a clean white/light gray background
+    with a minimal theme. Four quarters (Q1-Q4) are shown on the x-axis, with three
+    bars per quarter representing Electronics (blue #306998), Clothing (yellow #FFD43B),
+    and Home & Garden (red #DC2626). The y-axis shows "Revenue ($ thousands)" ranging
+    from 0 to 240. The title "bar-grouped · letsplot · pyplots.ai" appears at the
+    top in bold. A legend labeled "Product Category" is positioned on the right side.
+    The bars show clear patterns: Electronics shows consistent growth (145→168→192→235),
+    Clothing shows seasonal variation (98→112→87→142), and Home & Garden peaks in
+    summer (67→95→108→72). Bar spacing and grouping are well-balanced.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and ~24pt, axis titles ~20pt, tick labels ~16pt, all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean spacing throughout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good width (0.7) and alpha (0.9), clear
+          visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/red palette is distinguishable, though red could be problematic
+          for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units "Revenue ($ thousands)", X-axis "Quarter" is
+          descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid lines are removed (panel_grid_major_x=element_blank()), only
+          y-grid remains but subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped bar chart with position="dodge"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (quarters) on x-axis, groups (products) as fill, values
+          as bar heights
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors per group, clear legend, consistent bar widths, good
+          spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to 240, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labeled "Product Category" with accurate color mappings
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "bar-grouped · letsplot · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows growth trends (Electronics), seasonal patterns (Clothing),
+          and peaked behavior (Home & Garden), good variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales by product category is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values in $thousands (67-235) are plausible for retail, though
+          scale context could be clearer
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded values), no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pandas and lets_plot used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html, but path="." could be cleaner
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar correctly with geom_bar, position_dodge, scale_fill_manual,
+          theme_minimal, and ggsize. Uses lets-plot's HTML export capability. Could
+          leverage more interactive features.
+  verdict: APPROVED
diff --git a/plots/bar-grouped/metadata/matplotlib.yaml b/plots/bar-grouped/metadata/matplotlib.yaml
index ab6ae56a84..4d1e1cc7f0 100644
--- a/plots/bar-grouped/metadata/matplotlib.yaml
+++ b/plots/bar-grouped/metadata/matplotlib.yaml
@@ -21,3 +21,172 @@ review:
   weaknesses:
   - Legend position in upper left may conflict with data if values were higher in
     Q1 (upper right or outside plot would be safer)
+  image_description: The plot displays a grouped bar chart with quarterly sales data
+    (Q1-Q4) for three product categories. Electronics is shown in Python Blue (#306998),
+    Clothing in Python Yellow (#FFD43B), and Home & Garden in green (#4CAF50). Each
+    category group contains three side-by-side bars with value labels displayed above
+    each bar (e.g., 245, 178, 125 for Q1). The title "bar-grouped · matplotlib · pyplots.ai"
+    is centered at the top. The y-axis label reads "Sales (Thousands USD)" with values
+    from 0 to ~450, and the x-axis shows "Quarter" with Q1-Q4 labels. A legend in
+    the upper left corner identifies each product line. Subtle dashed grid lines appear
+    on the y-axis with good transparency.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, bars well-spaced, value labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars sized appropriately with good spacing between groups
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Green palette is colorblind-safe, high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units "(Thousands USD)", X-axis descriptive "Quarter"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid at alpha=0.3 is good, but legend could be positioned in less
+          data-relevant area (upper right would be better as it doesn't interfere
+          with potential higher values)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, groups as side-by-side bars, values as heights
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors, legend, consistent spacing, value labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, includes headroom for labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series exactly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "bar-grouped · matplotlib · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied patterns: Electronics peaks in Q4, Clothing grows steadily,
+          Home & Garden peaks mid-year'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales by product line is a common business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in hundreds of thousands USD are realistic for retail sales
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data, no randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses `strict=True` in zip which is Python 3.10+ only (minor compatibility
+          concern)
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' correctly
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of bar positioning with np.arange offsets, ax.annotate for
+          value labels, edge styling with white borders
+  verdict: APPROVED
diff --git a/plots/bar-grouped/metadata/plotly.yaml b/plots/bar-grouped/metadata/plotly.yaml
index 8d4255b7b6..c7acc3ba13 100644
--- a/plots/bar-grouped/metadata/plotly.yaml
+++ b/plots/bar-grouped/metadata/plotly.yaml
@@ -22,3 +22,154 @@ review:
   weaknesses:
   - Grid alpha at 0.1 is slightly too subtle - 0.2-0.3 would aid value reading
   - Does not leverage plotly-specific features like hover templates with custom formatting
+  image_description: The plot displays a grouped bar chart showing quarterly revenue
+    data (Q1-Q4) for three product lines. Electronics is shown in dark blue (#306998),
+    Clothing in golden yellow (#FFD43B), and Home & Garden in coral/salmon (#E17055).
+    Bars are grouped side-by-side for each quarter with value labels positioned above
+    each bar. The title "bar-grouped · plotly · pyplots.ai" is centered at the top
+    with a horizontal legend below it showing all three product categories. The y-axis
+    shows "Revenue ($ thousands)" ranging from 0 to ~400, and the x-axis shows "Quarter"
+    with Q1-Q4 labels. The plot uses a clean white template with subtle gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 20pt, value labels
+          at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, bars well-spaced, value labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized with good spacing between groups
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral are colorblind-safe and have excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Revenue ($ thousands)", X-axis has descriptive
+          label "Quarter"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped bar chart with side-by-side bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (quarters) on X, groups (products) as separate traces,
+          values as bar heights
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors, clear legend, consistent bar widths, value labels
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0 and shows all data including highest value (398)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all three product categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bar-grouped · plotly · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows variation: Electronics consistently highest, different growth
+          patterns per product, Q4 peaks for Electronics/Clothing, Q2 dip for Clothing'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly revenue by product line is a real business scenario with
+          plausible values
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values in $thousands (98-398K) are realistic for business
+          context
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → figure → layout → save structure, no functions/classes
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html for interactivity)
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-grouped/metadata/plotnine.yaml b/plots/bar-grouped/metadata/plotnine.yaml
index 38d11ceb5a..667ed48020 100644
--- a/plots/bar-grouped/metadata/plotnine.yaml
+++ b/plots/bar-grouped/metadata/plotnine.yaml
@@ -26,3 +26,178 @@ review:
     for colorblind users; consider using more distinct hues'
   - Does not leverage plotnine-specific features like scale_fill_brewer for built-in
     colorblind-safe palettes
+  image_description: 'The plot displays a grouped bar chart showing quarterly revenue
+    by product line. There are 4 quarters (Q1-Q4) on the x-axis, with 3 bars per quarter
+    representing Hardware (dark blue #306998), Services (yellow #FFD43B), and Software
+    (light blue #4B8BBE). The y-axis shows "Revenue ($ millions)" ranging from 0 to
+    ~165. The title "bar-grouped · plotnine · pyplots.ai" is displayed at the top.
+    A legend labeled "Product Line" appears on the right side. The plot uses a minimal
+    theme with subtle gray gridlines. Software consistently shows the highest revenue,
+    growing from ~120 in Q1 to ~165 in Q4, while Hardware remains relatively stable
+    (78-92) and Services grows modestly (45-70).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable with
+          appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean spacing throughout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible with good spacing between
+          groups
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/light blue palette is distinguishable but the two blues
+          could be better differentiated for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units "Revenue ($ millions)", X-axis "Quarter" is
+          descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, but legend shows categories in alphabetical
+          order (Hardware, Services, Software) rather than the order they appear in
+          the bars (Software, Hardware, Services), which creates confusion when reading
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped bar chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (Quarter) on x-axis, values (Revenue) on y-axis, groups
+          (Product) differentiated by color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Side-by-side bars, distinct colors, clear legend present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "bar-grouped · plotnine · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across quarters and between product lines, demonstrates
+          clear patterns (Software growth, Hardware stability), but all products show
+          similar upward trend
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly revenue by product line is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values in tens of millions are realistic for a business context
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Basic ggplot usage with geom_bar and position_dodge; could leverage
+          plotnine's grammar more (e.g., faceting, stat transformations, or scale_fill_brewer
+          for color palettes)
+  verdict: APPROVED
diff --git a/plots/bar-grouped/metadata/pygal.yaml b/plots/bar-grouped/metadata/pygal.yaml
index 871edc7e3f..730fde4df2 100644
--- a/plots/bar-grouped/metadata/pygal.yaml
+++ b/plots/bar-grouped/metadata/pygal.yaml
@@ -21,3 +21,178 @@ review:
   weaknesses:
   - Legend placement in top-left could be moved to a less intrusive position
   - Font sizes deviate from library style guide recommendations
+  image_description: 'The plot displays a grouped bar chart showing quarterly revenue
+    data for three product lines. Four quarters (Q1-Q4) are shown on the x-axis with
+    three bars per quarter representing Software (blue #306998), Hardware (yellow
+    #FFD43B), and Services (coral/pink #FF6B6B). The y-axis shows Revenue in millions
+    of dollars ranging from $0.0M to $6.0M. Each bar has a value label on top (e.g.,
+    $4.2M, $5.1M, $6.3M). The legend is positioned in the top-left corner. The title
+    "bar-grouped · pygal · pyplots.ai" appears at the top. The background is white
+    with subtle horizontal grid lines, and the overall layout is clean and well-proportioned.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and value labels are all clearly readable. Font
+          sizes are well-scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are cleanly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good spacing between groups and within groups.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral provide excellent contrast and are colorblind-friendly
+          (no red-green only distinction).
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; plot fills majority of space. Legend placement
+          in top-left is functional but slightly crowds the first data point area.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Revenue ($M)" and
+          "Quarter".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate. However, legend position overlaps
+          with the plot area margin and could be better placed.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped bar chart implementation.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, groups as separate series, values as bar heights.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, clear legend, consistent
+          bar widths, value labels on bars.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to above maximum value.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three product lines.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{spec-id} · {library} · pyplots.ai" format.
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across quarters and between product lines. Software
+          shows growth trend, all series show different patterns.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly revenue by product line is a real, comprehensible business
+          scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values in $2.5M-$6.3M range are realistic for product line
+          quarterly revenue.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → style → chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded values), but no random seed needed
+          since no random data is used. Minor deduction for not using numpy/random
+          at all.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style are imported, both are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Style customization, print_values with custom formatter,
+          SVG/PNG dual output. Could leverage more advanced features like tooltips
+          configuration or animation settings.
+  verdict: APPROVED
diff --git a/plots/bar-grouped/metadata/seaborn.yaml b/plots/bar-grouped/metadata/seaborn.yaml
index 324b14bf23..ccd99d8fe4 100644
--- a/plots/bar-grouped/metadata/seaborn.yaml
+++ b/plots/bar-grouped/metadata/seaborn.yaml
@@ -22,3 +22,159 @@ review:
   weaknesses:
   - Legend positioned in upper left partially overlaps with Q1 bars; consider placing
     outside plot or in upper right
+  image_description: 'The plot displays a grouped bar chart showing quarterly sales
+    data (Q1-Q4) for three product lines: Electronics (dark blue), Clothing (golden
+    yellow), and Home & Garden (teal). Each quarter has three bars side-by-side. The
+    y-axis shows "Sales (thousands $)" ranging from 0 to 400, and the x-axis shows
+    quarters. There''s a clear upward trend for Electronics sales across quarters
+    (245→280→310→395). The title "bar-grouped · seaborn · pyplots.ai" is centered
+    at the top. A legend titled "Product Line" is positioned in the upper left corner.
+    Subtle dashed horizontal grid lines appear in the background.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text perfectly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bars well-sized with good spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: colorblind-safe blue/yellow/teal palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent proportions, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units, X-axis is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: legend position in upper left slightly overlaps with Q1 data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct grouped bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: categories on X, values on Y, groups as hue
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: distinct colors, clear legend, consistent spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: labels match data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses {spec-id} · {library} · pyplots.ai
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows variation across quarters and products with clear trends
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: quarterly sales by product line is plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: values in realistic range for sales data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: deterministic data but no explicit seed marker
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-horizontal/metadata/altair.yaml b/plots/bar-horizontal/metadata/altair.yaml
index 933a899733..4978ae6ce8 100644
--- a/plots/bar-horizontal/metadata/altair.yaml
+++ b/plots/bar-horizontal/metadata/altair.yaml
@@ -28,3 +28,171 @@ review:
     attention
   - Could leverage more Altair-specific interactivity features like selection/highlighting
     in HTML version
+  image_description: The plot displays a horizontal bar chart showing the "Top 10
+    programming languages by popularity." Python sits at the top with the longest
+    bar (~28.5%), followed by JavaScript (~18.2%), Java (~15.8%), and descending to
+    Kotlin at the bottom (~1.6%). All bars use a consistent blue color (#306998 -
+    Python Blue) with subtle rounded corners on the right end. The chart has a light
+    dashed grid (alpha ~0.3) for reference. The y-axis shows "Programming Language"
+    with all 10 language names clearly readable. The x-axis shows "Popularity (%)"
+    ranging from 0 to 30. The title "bar-horizontal · altair · pyplots.ai" is centered
+    at the top. The layout is well-balanced with good use of canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all category names clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths are well-proportioned for 10 categories, appropriate spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, excellent contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart fills canvas well (~60-70%), balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Popularity (%)" includes units, "Programming Language" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), but no legend needed for single-color
+          chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis (correct horizontal orientation)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted by value (largest to smallest), consistent bar heights, single
+          color
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full range (0-30), all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single color, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "bar-horizontal · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows ranking well, but all bars same color (no highlighted bar as
+          spec suggests could be done)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language popularity is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, values are realistic for language popularity
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values, no randomness)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding, tooltips, cornerRadiusEnd for
+          rounded bars, but could leverage more interactivity features
+  verdict: APPROVED
diff --git a/plots/bar-horizontal/metadata/bokeh.yaml b/plots/bar-horizontal/metadata/bokeh.yaml
index a3992201e5..578d6b7e18 100644
--- a/plots/bar-horizontal/metadata/bokeh.yaml
+++ b/plots/bar-horizontal/metadata/bokeh.yaml
@@ -24,3 +24,161 @@ review:
   weaknesses:
   - Some unused canvas space on the right side could be better utilized
   - Y-axis label is missing (could add Programming Language label for completeness)
+  image_description: The plot displays a horizontal bar chart showing "Top Programming
+    Languages by Developer Popularity (%)" with 10 programming languages. The bars
+    are rendered in a consistent steel blue color (#306998) with a darker border.
+    Languages are sorted from bottom (Kotlin at 9.2%) to top (JavaScript at 65.6%),
+    creating a clear visual ranking. The title "bar-horizontal · bokeh · pyplots.ai"
+    is centered at the top. The x-axis is labeled "Developer Popularity (%)" with
+    a range from 0 to 70. Category labels (JavaScript, Python, TypeScript, Java, C#,
+    C++, PHP, Go, Rust, Kotlin) appear on the y-axis. The grid lines are subtle and
+    dashed, appearing only on the x-axis. The overall layout is clean with good spacing
+    between bars.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text readable, good font sizes for large canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bars well-proportioned with good spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: single blue color is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: functional but some empty space on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive label with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: subtle grid, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: categories on Y, values on X
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: sorted bars, consistent heights, single color, good spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: appropriate axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses {spec-id} · {library} · pyplots.ai
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows full range of values with good variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: real programming language popularity data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: realistic percentage values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: deterministic data, no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-horizontal/metadata/highcharts.yaml b/plots/bar-horizontal/metadata/highcharts.yaml
index 666efb22f8..a4e2a99e97 100644
--- a/plots/bar-horizontal/metadata/highcharts.yaml
+++ b/plots/bar-horizontal/metadata/highcharts.yaml
@@ -23,3 +23,177 @@ review:
   weaknesses:
   - Y-axis title could be more prominent or positioned better
   - X-axis tick labels every 2% create visual clutter; every 10% or 20% would be cleaner
+  image_description: The plot displays a horizontal bar chart showing "Top 10 Countries
+    by Renewable Energy Share (%)" as the subtitle. The title reads "bar-horizontal
+    · highcharts · pyplots.ai" in bold at the top. Ten blue bars (#306998) extend
+    horizontally from left to right, with country names on the y-axis (Switzerland
+    at top, Iceland at bottom - sorted ascending by value). Each bar has a data label
+    showing the percentage value (e.g., "85%" for Iceland, "38.5%" for Switzerland).
+    The x-axis shows percentage values from 0% to 100% with tick marks every 2%. The
+    background is white with subtle gray gridlines. The layout is clean with good
+    spacing between bars.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, subtitle at 32px, axis labels at 28px/24px - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with appropriate padding and spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good margins and canvas utilization, slight excess whitespace at
+          bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label "Renewable Energy Share (%)" but no
+          x-axis category title needed
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha 0.3 equivalent), legend disabled which is appropriate
+          for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values as bar lengths - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted bars, consistent bar heights, data labels, single color, adequate
+          spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-100%, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly disabled for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bar-horizontal · highcharts · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 10 categories with varying values demonstrating ranking visualization;
+          could show more value diversity
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Renewable energy percentages by country is a realistic, relatable
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages range from 38.5% to 85%, realistic for renewable energy
+          data
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart setup → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts bar series with data labels, custom styling, and
+          proper chart export via Selenium; could leverage more Highcharts-specific
+          features like tooltips or animations
+  verdict: APPROVED
diff --git a/plots/bar-horizontal/metadata/letsplot.yaml b/plots/bar-horizontal/metadata/letsplot.yaml
index 3cec5fe4b6..c5c6c73672 100644
--- a/plots/bar-horizontal/metadata/letsplot.yaml
+++ b/plots/bar-horizontal/metadata/letsplot.yaml
@@ -22,3 +22,167 @@ review:
   weaknesses:
   - Grid customization could be slightly more refined with lower alpha for vertical
     grid lines
+  image_description: 'The plot displays a horizontal bar chart showing programming
+    language popularity among developers. Ten programming languages are listed on
+    the y-axis (from bottom to top: Rust, Go, C, PHP, C++, C#, TypeScript, Java, Python,
+    JavaScript), sorted by popularity in ascending order. The x-axis shows "Developers
+    (%)" ranging from 0 to 65. All bars use a consistent steel blue color (#306998).
+    JavaScript leads at ~65%, followed by Python at ~49%. The title "bar-horizontal
+    · letsplot · pyplots.ai" appears at the top in bold. The chart uses a minimal
+    theme with light gray vertical grid lines and no horizontal grid lines, providing
+    a clean, professional appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title bold ~24pt, axis labels ~20pt, tick labels ~16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with width=0.7 and alpha=0.9
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color (#306998), colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Programming Language" and "Developers (%)" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Vertical grid subtle, no legend needed for single-color bars
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted by value, single color, consistent bar heights, adequate spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-color bars
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "bar-horizontal · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 10 categories with good value variation (11.76 to 65.36)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real programming language survey data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic percentages matching real-world surveys
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pandas and lets_plot
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: ggplot2 grammar, coord_flip, theme_minimal, ggsize, scale parameter
+  verdict: APPROVED
diff --git a/plots/bar-horizontal/metadata/matplotlib.yaml b/plots/bar-horizontal/metadata/matplotlib.yaml
index bd8aa778f9..44e67c225a 100644
--- a/plots/bar-horizontal/metadata/matplotlib.yaml
+++ b/plots/bar-horizontal/metadata/matplotlib.yaml
@@ -22,3 +22,174 @@ review:
   - Could highlight the top 1-3 languages with a different color to draw attention
     as suggested in spec
   - Bar edge color adds visual complexity without clear benefit
+  image_description: The plot displays a horizontal bar chart showing "Top 10 programming
+    languages by popularity". The chart uses a blue color (#306998) for all bars with
+    a darker edge. Python is at the top with 68.2%, followed by JavaScript at 62.5%,
+    then Java, C++, TypeScript, C#, Go, Rust, PHP, and Swift at the bottom with 18.3%.
+    Bars are sorted from largest (top) to smallest (bottom). Each bar has a percentage
+    label at its end. The x-axis shows "Popularity (%)" ranging from 0 to 80, and
+    the y-axis shows "Programming Language". The title correctly uses the format "bar-horizontal
+    · matplotlib · pyplots.ai". A subtle dashed grid appears on the x-axis. Top and
+    right spines are removed for a cleaner look.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all category labels and value labels are clearly
+          separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar heights appropriate (0.65), good spacing between bars, value
+          labels clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no colorblind issues, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has "Popularity (%)" with units, Y-axis has descriptive "Programming
+          Language"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3), but no legend needed/present (acceptable)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart using ax.barh()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: sorted bars, consistent bar heights,
+          value labels at bar ends, adequate spacing'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis extends to 80% (1.2x max value) showing all data clearly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "bar-horizontal · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows ranking effectively with 10 categories, good variety in values,
+          but all bars same color (spec mentions "highlight specific bars" as option)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language popularity is a realistic, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages are realistic and within plausible range for survey data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not strictly needed for hardcoded
+          data)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ax.barh(), text annotations, spine removal, but no advanced
+          matplotlib features like custom patches or color gradients
+  verdict: APPROVED
diff --git a/plots/bar-horizontal/metadata/plotly.yaml b/plots/bar-horizontal/metadata/plotly.yaml
index 0783d1e3c1..b2daa2e290 100644
--- a/plots/bar-horizontal/metadata/plotly.yaml
+++ b/plots/bar-horizontal/metadata/plotly.yaml
@@ -23,3 +23,181 @@ review:
   - Value label font size (32) could be slightly reduced as it appears close to bar
     ends
   - X-axis range padding (1.15x max) creates extra whitespace on right side
+  image_description: The plot displays a horizontal bar chart showing survey results
+    for "What programming language do you use most?". Ten programming languages are
+    listed on the y-axis (Python, JavaScript, TypeScript, Java, C++, Go, Rust, Ruby,
+    PHP, Swift) with horizontal blue bars (#306998) extending to the right representing
+    the number of responses. Each bar has a value label displayed outside at the end.
+    The bars are sorted in descending order by value, with Python having the longest
+    bar (2847 responses) and Swift the shortest (298). The title "bar-horizontal ·
+    plotly · pyplots.ai" is centered at the top. The x-axis shows "Number of Responses"
+    ranging from 0 to ~3000, and the y-axis is labeled "Programming Language". The
+    background uses the plotly_white template with subtle grid lines on the x-axis.
+    Overall layout is clean and well-proportioned.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and value labels are all clearly
+          readable at the 4800x2700 resolution with appropriate font sizes (32-40pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels on y-axis are well-spaced,
+          value labels positioned outside bars with no collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized with good spacing (bargap=0.3), marker_line
+          adds nice visual definition
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with appropriate margins; slight deduction
+          as some extra whitespace on right due to x-axis range extension
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Number of Responses", "Programming Language")
+          but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.1), no legend needed for single-series chart;
+          y-axis grid is appropriately hidden
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis as expected for horizontal
+          bars
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Consistent bar heights, sorted by value (descending), single color,
+          good spacing, value labels at end of bars
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (0 to max*1.15) shows all data with room for labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series; N/A but appropriate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-horizontal · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows ranking, comparison, and survey result use case well; could
+          show more variation in bar lengths (current distribution is fairly linear
+          decline)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language survey is a perfect, relatable real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response counts (298-2847) are realistic for a developer survey
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no np.random.seed even though
+          it's not strictly needed here
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects is imported, which is used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Bar with orientation="h", text labels, template system; generates
+          HTML for interactivity. Could leverage more Plotly features like hover customization
+          or annotations.
+  verdict: APPROVED
diff --git a/plots/bar-horizontal/metadata/plotnine.yaml b/plots/bar-horizontal/metadata/plotnine.yaml
index 2bef99b302..d7aba384ca 100644
--- a/plots/bar-horizontal/metadata/plotnine.yaml
+++ b/plots/bar-horizontal/metadata/plotnine.yaml
@@ -22,3 +22,162 @@ review:
   weaknesses:
   - Grid styling uses element_text instead of element_line for panel_grid_major_x
     and panel_grid_minor (should be element_line(color=...))
+  image_description: The plot displays a horizontal bar chart showing "Top 10 programming
+    languages by popularity" with programming languages on the Y-axis and developer
+    usage percentage on the X-axis. JavaScript leads at 65.6%, followed by Python
+    at 49.3%, with Swift at the bottom at 6.6%. The bars are a consistent steel blue
+    color (#306998), extending horizontally from left to right. Each bar has a percentage
+    label positioned just to the right of the bar end. The title "bar-horizontal ·
+    plotnine · pyplots.ai" appears at the top in bold. The minimal theme provides
+    a clean white background with subtle gray grid lines. The layout is well-balanced
+    with the plot filling the canvas appropriately.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title bold ~24pt, axis labels ~20pt, tick labels ~16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All text clearly separated, no overlapping elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with width=0.7, good spacing, data labels visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no accessibility issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Developer Usage (%)", "Programming Language"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Uses `element_text` instead of `element_line` for grid styling (incorrect
+          API, though visually acceptable)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart using coord_flip()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted bars, consistent heights, value labels, adequate spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-horizontal · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Good value variation (6.6% to 65.6%), properly sorted
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language popularity is a relatable, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages are realistic for developer usage statistics
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic hardcoded data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-horizontal/metadata/pygal.yaml b/plots/bar-horizontal/metadata/pygal.yaml
index 246287e58d..e4d50f80ef 100644
--- a/plots/bar-horizontal/metadata/pygal.yaml
+++ b/plots/bar-horizontal/metadata/pygal.yaml
@@ -27,3 +27,175 @@ review:
     readability
   - Value labels on bars (18.6%, 16.3%, 12.9%) are positioned inside bars making them
     harder to read against the blue background
+  image_description: The plot displays a horizontal bar chart showing programming
+    language popularity survey results. There are 10 horizontal bars in a consistent
+    steel blue color (#306998), sorted from highest (Python at 68.7%) to lowest (Swift
+    at 12.9%). The title "bar-horizontal · pygal · pyplots.ai" appears at the top
+    in dark text. The x-axis shows "Popularity (%)" with tick marks from 0.0% to 60.0%.
+    Each bar has its percentage value displayed at the end. A legend at the bottom
+    shows all 10 programming languages (Python, JavaScript, Java, C++, TypeScript,
+    C#, Go, Rust, PHP, Swift) arranged in two rows of 5 columns. The background is
+    white with subtle vertical grid lines. The bars have good spacing and the overall
+    layout is clean and professional.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; value labels on bars
+          are slightly small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good spacing between them
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (steel blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight issue with no y-axis category labels
+          visible on the bars themselves
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "Popularity (%)" with units, but no y-axis label (categories
+          shown in legend instead)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle vertical grid lines; legend at bottom is functional but spreads
+          categories far from their bars
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis as horizontal bar lengths
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has consistent bar heights, sorted by value, single color; missing
+          direct category labels on y-axis
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axis extends appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data correctly
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but uses center dot (·) separator correctly
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full range of values with clear differentiation between bars
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language popularity is a real, relatable survey scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages are realistic for language popularity surveys
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded), but no explicit seed needed; minor
+          deduction for not using np.random
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's HorizontalBar, custom Style, print_values, value_formatter,
+          and SVG-based rendering with HTML export; could leverage more interactive
+          features
+  verdict: APPROVED
diff --git a/plots/bar-horizontal/metadata/seaborn.yaml b/plots/bar-horizontal/metadata/seaborn.yaml
index 38c72aab06..12da91c278 100644
--- a/plots/bar-horizontal/metadata/seaborn.yaml
+++ b/plots/bar-horizontal/metadata/seaborn.yaml
@@ -23,3 +23,175 @@ review:
     suggestion for drawing attention to specific categories
   - Uses seaborn primarily as a styling layer over matplotlib rather than leveraging
     seaborn statistical features
+  image_description: The plot displays a horizontal bar chart showing programming
+    language popularity survey results. There are 10 horizontal bars in a consistent
+    steel blue color (#306998), sorted from lowest (Kotlin at 12%) at the top to highest
+    (Python at 68%) at the bottom. Each bar has a percentage label at its end. The
+    y-axis shows "Programming Language" with language names (Kotlin, Swift, Rust,
+    Go, TypeScript, C#, C++, Java, JavaScript, Python), and the x-axis shows "Percentage
+    of Respondents (%)" with values from 0 to 70. The title reads "bar-horizontal
+    · seaborn · pyplots.ai". A subtle dashed grid is visible on the x-axis. Top and
+    right spines are removed for a cleaner look.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is perfectly readable: title at 24pt, axis labels at 20pt,
+          tick labels at 16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with consistent height, good spacing between
+          categories
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme avoids colorblind issues entirely
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: X-axis has units "(%)", Y-axis label "Programming Language" is descriptive
+          but no unit needed for categorical
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid at alpha 0.3, no legend needed (single color)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: sorted bars, value labels, consistent
+          bar heights, adequate spacing'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis extended to 78 to accommodate labels, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-color chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bar-horizontal · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows ranking/comparison well, sorted from low to high; could have
+          highlighted a specific bar to show that feature
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language popularity survey is a real, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are percentages 12-68%, realistic but total exceeds 100% (multi-select
+          survey implied)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic but no random seed comment since data is hardcoded
+          (acceptable, but could document)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only used imports: matplotlib.pyplot, pandas, seaborn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern seaborn API with hue parameter correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses seaborn's barplot correctly with hue for palette, but doesn't
+          leverage seaborn-specific statistical features like confidence intervals
+          or other advanced capabilities
+  verdict: APPROVED
diff --git a/plots/bar-permutation-importance/metadata/altair.yaml b/plots/bar-permutation-importance/metadata/altair.yaml
index 1a8f7de642..98104f2bca 100644
--- a/plots/bar-permutation-importance/metadata/altair.yaml
+++ b/plots/bar-permutation-importance/metadata/altair.yaml
@@ -26,3 +26,174 @@ review:
   - Missing subtle grid lines that would help readers trace values across the chart
   - Could benefit from adding .interactive() for zoom/pan capabilities on the HTML
     version
+  image_description: The plot displays a horizontal bar chart with 15 housing price
+    model features sorted by permutation importance (highest at top). Location Score
+    (~0.142) and Square Footage (~0.128) are the most important features, shown with
+    darker blue bars. The color gradient uses a sequential blue palette ("blues" scheme),
+    with bars transitioning from dark blue (high importance) to very light blue (near-zero
+    or negative importance). Horizontal error bars extend from each bar showing variability
+    across shuffles. A vertical dashed reference line at x=0 distinguishes positive
+    from negative importance. Two features (Basement Area and Pool Presence) show
+    small negative importance values. The x-axis is labeled "Mean Decrease in Model
+    Score" and y-axis is labeled "Feature". The title correctly uses the format "bar-permutation-importance
+    · altair · pyplots.ai".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 16-18pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, feature names well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar size (30) appropriate for 15 features, error bars clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but some whitespace could be optimized
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Mean Decrease in Model Score", "Feature"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid lines present (would help readability), no legend needed
+          (color legend disabled)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on Y-axis, importance on X-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has error bars, zero line, sorted by importance, color gradient
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including negative values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (legend appropriately hidden for single color scale)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: {spec-id} · {library} · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive, near-zero, and negative importance values; good variety
+          in importance magnitudes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Housing price prediction is a well-known ML scenario with realistic
+          feature names
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Importance values (0.005-0.142) are realistic for permutation importance;
+          std devs appropriately scaled
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative layering, tooltips, encoding sort field,
+          but could use more interactivity
+  verdict: APPROVED
diff --git a/plots/bar-permutation-importance/metadata/bokeh.yaml b/plots/bar-permutation-importance/metadata/bokeh.yaml
index 2538e281f8..c9eaeea6aa 100644
--- a/plots/bar-permutation-importance/metadata/bokeh.yaml
+++ b/plots/bar-permutation-importance/metadata/bokeh.yaml
@@ -27,3 +27,178 @@ review:
     improve visibility
   - Could leverage Bokeh interactive features like HoverTool to show exact values
     on hover
+  image_description: 'The plot displays a horizontal bar chart showing permutation
+    feature importance for a housing price prediction model. There are 15 features
+    sorted by mean importance (highest at top): Square Footage, Number of Bedrooms,
+    Neighborhood Score, etc. The bars use a blue sequential color gradient from Blues9
+    palette (darker blue = higher importance, lighter blue = lower/negative importance).
+    Each bar has horizontal error bars (whiskers) showing the standard deviation across
+    permutation shuffles. A vertical dashed reference line at x=0 distinguishes positive
+    from negative importance values. The bottom two features (Walk Score, Public Transit
+    Access) have slightly negative importance values extending left of the zero line.
+    The title "bar-permutation-importance · bokeh · pyplots.ai" appears at the top.
+    The x-axis is labeled "Mean Decrease in Model Score" and feature names are displayed
+    on the y-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable, slightly larger title would be ideal but very good
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all feature names fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with height=0.7, error bars visible with line_width=3
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blues9 sequential palette is colorblind-safe, good choice, though
+          gradient differences are subtle for middle values
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label but no units (could be "Mean Decrease
+          in R² Score" or similar)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: X-grid at alpha 0.4 is appropriate, y-grid disabled correctly, no
+          legend needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart for permutation importance
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance on x-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: sorted bars, error bars, color gradient,
+          zero reference line'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including negative values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A (full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-permutation-importance · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows range from high importance to negative importance, good variability
+          in std values, could show more extreme negative cases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Housing price model is an excellent, neutral, real-world ML scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Importance values 0.18 to -0.008 are realistic for sklearn permutation_importance
+          output, std values proportional
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource, linear_cmap transform, Whisker model
+          for error bars - these are Bokeh-specific patterns, but could leverage HoverTool
+          for interactivity
+  verdict: APPROVED
diff --git a/plots/bar-permutation-importance/metadata/highcharts.yaml b/plots/bar-permutation-importance/metadata/highcharts.yaml
index 97946297ec..9c55d6f47b 100644
--- a/plots/bar-permutation-importance/metadata/highcharts.yaml
+++ b/plots/bar-permutation-importance/metadata/highcharts.yaml
@@ -1,6 +1,3 @@
-# Per-library metadata for highcharts implementation of bar-permutation-importance
-# Auto-generated by impl-generate.yml
-
 library: highcharts
 specification_id: bar-permutation-importance
 created: '2025-12-31T10:59:42Z'
@@ -15,5 +12,192 @@ preview_thumb: https://storage.googleapis.com/pyplots-images/plots/bar-permutati
 preview_html: https://storage.googleapis.com/pyplots-images/plots/bar-permutation-importance/highcharts/plot.html
 quality_score: null
 review:
-  strengths: []
-  weaknesses: []
+  strengths:
+  - Excellent use of Highcharts-specific features including error bar series overlay
+    and dynamic color gradient per bar
+  - Clean sequential blue color scheme effectively communicates importance magnitude
+  - Proper implementation of horizontal bar chart with sorted features (most important
+    at top)
+  - Error bars clearly show variability across permutation repetitions
+  - Vertical reference line at x=0 effectively distinguishes positive from negative
+    importance
+  - Good subtitle provides meaningful context about the model being visualized
+  - 'Appropriate margin settings (marginLeft: 300) prevent feature name truncation'
+  weaknesses:
+  - X-axis tick labels at bottom appear small/compressed in the PNG - could benefit
+    from larger font size
+  - Legend is disabled which removes the "Importance" series name context (though
+    acceptable for single series)
+  image_description: The plot is a horizontal bar chart displaying permutation feature
+    importance for a crop yield prediction model. The chart shows 15 features sorted
+    by importance (highest at top). Colors use a sequential blue gradient - darker
+    blue (#306998) for most important features fading to lighter blue for less important
+    ones. The title reads "bar-permutation-importance · highcharts · pyplots.ai" with
+    a subtitle "Crop Yield Prediction Model - Feature Importance". The y-axis shows
+    "Mean Decrease in Model Score" with tick marks from approximately -0.05 to 0.40.
+    The x-axis displays feature names like "Temperature (C)", "Humidity (%)", "Wind
+    Speed (m/s)", etc. Error bars (whiskers) are visible on each bar showing importance
+    variability. A vertical reference line at x=0 distinguishes positive from negative
+    importance values. Two features (Visibility and Air Quality Index) show slightly
+    negative importance with error bars crossing zero.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, subtitle at 32px, axis labels at 32px, tick labels
+          at 24-26px - all perfectly readable at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, feature names fully visible on left
+          side with adequate margin (300px)
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, error bars clearly visible with appropriate
+          whisker width (4px) and stem width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe blue sequential gradient, no red-green issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas, adequate margins, minor: could use slightly
+          more vertical spacing between bars'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label "Mean Decrease in Model Score", X-axis
+          labeled "Feature"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is present but x-axis tick labels at bottom appear small/compressed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis (categories), importance on x-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal bars sorted by importance,
+          error bars for variability, sequential color gradient, vertical reference
+          line at x=0'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data including negative values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend shown (legend disabled, which is acceptable for single
+          series but loses some context)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-permutation-importance · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 15 features with varying importance including positive AND
+          negative values, demonstrates model-agnostic permutation importance concept
+          well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Crop yield prediction with weather/environmental features is a plausible,
+          neutral scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values range from ~-0.02 to ~0.35 which is realistic for permutation
+          importance scores
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: HTML shows clean chart configuration, no unnecessary complexity
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data appears deterministic but seed handling not visible in HTML
+          output
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Single Highcharts dependency properly embedded inline
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts v12.4.0 API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-permutation-importance/metadata/letsplot.yaml b/plots/bar-permutation-importance/metadata/letsplot.yaml
index 17a6895564..9215da5f51 100644
--- a/plots/bar-permutation-importance/metadata/letsplot.yaml
+++ b/plots/bar-permutation-importance/metadata/letsplot.yaml
@@ -24,3 +24,182 @@ review:
     - geom_vline with xintercept=0 creates a horizontal line instead of vertical after
     the coordinate flip
   - HTML output could leverage lets-plot interactivity features like tooltips
+  image_description: The plot displays a horizontal bar chart showing permutation
+    feature importance for 15 features from a credit/lending machine learning model.
+    The bars are colored using a gradient from yellow (low importance) to blue (high
+    importance). Income Level has the highest importance (~0.15), followed by Credit
+    Score (~0.12), Employment Years (~0.09), and so on down to Region Code (near zero/slightly
+    negative). Each bar has black error bars showing the standard deviation. A dashed
+    gray vertical reference line is visible at x=0. The y-axis shows feature names
+    (Feature), and the x-axis shows "Mean Decrease in Model Score". The title reads
+    "bar-permutation-importance · letsplot · pyplots.ai". The plot uses a minimal
+    theme with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title, axis labels, and tick labels are all readable at full size.
+          Font sizes are well-chosen (24pt title, 20pt axis labels, 16/14pt tick labels).
+          Minor: y-axis labels could be slightly larger.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere. Feature names are well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are clearly visible with good width (0.7) and alpha (0.9). Error
+          bars are distinct with appropriate width and color.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow-to-blue gradient is colorblind-safe, good contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Plot fills canvas well; minor: some whitespace on the right side.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Feature" and "Mean Decrease in Model Score".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate. Legend is hidden (guide="none") which
+          is correct since color encodes the same variable as bar length. However,
+          the dashed reference line at x=0 is positioned incorrectly - it appears
+          as a horizontal line across the plot rather than a vertical line at x=0
+          on the importance axis.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart for permutation importance.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance mean on x-axis with error bars.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has sorted bars, error bars, color gradient, but the vertical reference
+          line at x=0 is not rendering correctly (appears as horizontal dashed line).
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, shows full range including negative values.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, color gradient is self-explanatory with sorted
+          bars.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-permutation-importance · letsplot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 15 features with varying importance levels, includes positive
+          and near-zero/negative values, varying error bar sizes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Credit/lending scenario is realistic and neutral. Feature names make
+          domain sense.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values (0-0.15) are realistic for permutation importance
+          output.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses path="." which works but non-standard; should just be filename
+          only for cleaner approach.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses lets-plot ggplot2-style grammar with geoms, themes, scales.
+          Good use of scale_fill_gradient and theme customization. Could use more
+          advanced features like tooltips for interactivity in HTML output.
+  verdict: APPROVED
diff --git a/plots/bar-permutation-importance/metadata/matplotlib.yaml b/plots/bar-permutation-importance/metadata/matplotlib.yaml
index 87ffd55b83..c1a9459d92 100644
--- a/plots/bar-permutation-importance/metadata/matplotlib.yaml
+++ b/plots/bar-permutation-importance/metadata/matplotlib.yaml
@@ -24,3 +24,175 @@ review:
   - Could demonstrate more edge cases like several features with very similar near-zero
     importance
   - Limited use of distinctive matplotlib features beyond standard barh and colorbar
+  image_description: The plot displays a horizontal bar chart showing permutation
+    feature importance for 13 wine-related features. Bars extend horizontally from
+    a vertical reference line at x=0, with the highest importance feature (flavanoids,
+    ~0.142) at the top and the lowest (ash, ~-0.002) at the bottom. The bars use a
+    sequential Blues color gradient mapped to importance values - darker blue for
+    higher importance, lighter for lower. Each bar includes horizontal error bars
+    (gray caps) showing standard deviation. The colorbar on the right indicates the
+    importance scale (0.00-0.14). Title follows the correct format. X-axis shows "Mean
+    Decrease in Accuracy" and Y-axis shows "Feature". Grid lines are subtle with dashed
+    styling at alpha=0.3.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, ticks at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, feature names are spaced well
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar height 0.7 is appropriate for 13 features, error bars clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues sequential colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, colorbar placed with appropriate padding
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but x-axis could include "(fraction)" or similar
+          unit
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha=0.3), colorbar replaces legend appropriately;
+          grid only on x-axis as intended
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart for permutation importance
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on Y-axis, importance on X-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: sorted by importance, error bars, color
+          gradient, reference line at x=0'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full range including negative value (ash)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Importance"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: "bar-permutation-importance · matplotlib ·
+          pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive and negative importance values, varying magnitudes;
+          could include more features with near-zero importance for better demonstration
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wine dataset features (sklearn wine) are realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values 0-0.15 are typical for permutation importance
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses barh, colorbar, ScalarMappable correctly; could leverage ax.errorbar
+          or more advanced matplotlib features
+  verdict: APPROVED
diff --git a/plots/bar-permutation-importance/metadata/plotly.yaml b/plots/bar-permutation-importance/metadata/plotly.yaml
index e10ff0df0b..cdaffce3fe 100644
--- a/plots/bar-permutation-importance/metadata/plotly.yaml
+++ b/plots/bar-permutation-importance/metadata/plotly.yaml
@@ -26,3 +26,176 @@ review:
   - X-axis label could include units or more context (e.g., Mean Decrease in R² Score)
   - Color gradient could use a more standard colormap name for clarity in code
   - Could add annotations for the top features to enhance information density
+  image_description: The plot displays a horizontal bar chart showing permutation
+    feature importance for 15 weather/environmental features. The bars are sorted
+    by importance with "Temperature" at the top (highest importance ~0.245) down to
+    "Time of Day" at the bottom (negative importance ~-0.008). A sequential color
+    gradient transitions from dark teal/blue (low importance) to yellow (high importance).
+    Each bar has horizontal error bars showing standard deviation. A dashed vertical
+    reference line at x=0 distinguishes positive from negative importance values.
+    The title reads "bar-permutation-importance · plotly · pyplots.ai" centered at
+    the top. X-axis is labeled "Mean Decrease in Model Score" and Y-axis is labeled
+    "Feature". The background is clean white with subtle gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; feature names are well-spaced on y-axis
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized, error bars visible and proportional
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue-to-yellow gradient is colorblind-safe (viridis-like)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins, plot fills appropriate
+          area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but x-axis lacks units (could specify "score decrease"
+          units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend needed for single-series
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart for permutation importance
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance values on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has error bars, color gradient, vertical reference line at x=0, sorted
+          by importance
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range including negative values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses `{spec-id} · {library} · pyplots.ai` format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive AND negative importance values, varying error bar
+          sizes; could show more extreme negative values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Weather/environmental ML model is a plausible, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for permutation importance (0-0.25 range typical);
+          some features have nearly identical low values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and plotly.graph_objects (all used)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses `go.Bar` with `error_x`, `add_vline`, custom hover template;
+          could leverage more Plotly-specific features like annotations or subplot
+          capabilities
+  verdict: APPROVED
diff --git a/plots/bar-permutation-importance/metadata/plotnine.yaml b/plots/bar-permutation-importance/metadata/plotnine.yaml
index 392012e0fb..148be40310 100644
--- a/plots/bar-permutation-importance/metadata/plotnine.yaml
+++ b/plots/bar-permutation-importance/metadata/plotnine.yaml
@@ -24,3 +24,175 @@ review:
   - Library features score is modest - could leverage more plotnine-specific features
   - Horizontal grid lines (panel_grid_major_y) show through bars which adds visual
     noise for a horizontal bar chart
+  image_description: The plot displays a horizontal bar chart showing permutation
+    feature importance for a customer churn prediction model. The title "bar-permutation-importance
+    · plotnine · pyplots.ai" appears at the top in bold. Features are listed on the
+    y-axis (Contract Length at top down to Multiple Lines at bottom), sorted by importance.
+    The x-axis shows "Mean Decrease in Model Score" ranging from 0.00 to approximately
+    0.15. Bars are colored with a sequential gradient from yellow (low importance)
+    to blue (high importance). Each bar has horizontal error bars showing variability.
+    A vertical dashed gray reference line at x=0 distinguishes positive from negative
+    importance values. The legend on the right shows the "Importance" color scale.
+    The layout is clean with a minimal theme, subtle grid lines, and well-spaced elements.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold at 24pt, axis labels at 20pt, tick labels at 16pt -
+          all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, feature names fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are clearly visible, error bars appropriately sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow-to-blue gradient is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, legend well-placed, plot fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Feature", "Mean Decrease in Model Score") but
+          no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha 0.3), but minor y-grid visible when not needed
+          for horizontal bars
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on y-axis, importance on x-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has error bars, color gradient, reference line at x=0, sorted by
+          importance
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including negative value for Multiple Lines
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Importance legend correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: bar-permutation-importance · plotnine · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 15 features with varying importance including one negative
+          value, good range but could show more variation in error bar sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer churn prediction is a real, neutral ML scenario with plausible
+          feature names
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values 0-0.15 are realistic for permutation importance
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses plotnine grammar (ggplot, geom_col, coord_flip, scale_fill_gradient,
+          theme_minimal) correctly but doesn't showcase advanced features like faceting
+          or statistical transformations
+  verdict: APPROVED
diff --git a/plots/bar-permutation-importance/metadata/seaborn.yaml b/plots/bar-permutation-importance/metadata/seaborn.yaml
index 038d46496a..a47936fe67 100644
--- a/plots/bar-permutation-importance/metadata/seaborn.yaml
+++ b/plots/bar-permutation-importance/metadata/seaborn.yaml
@@ -24,3 +24,160 @@ review:
     error bar capabilities in barplot
   - Grid uses dashed style which is slightly more visually distracting than solid
     with low alpha
+  image_description: 'The plot is a horizontal bar chart displaying permutation feature
+    importance for 13 wine dataset features. The chart uses a viridis color palette
+    ranging from bright yellow-green (highest importance: proline at ~0.05) to dark
+    teal/green (lower importance features). Features are sorted by importance with
+    "proline" at the top, followed by "color_intensity", "flavanoids", and "alcohol"
+    showing visible bars with importance values. The remaining 9 features (od280/od315_of_diluted_wines
+    through malic_acid) show effectively zero importance with no visible bars. Each
+    bar has horizontal error bars (black with caps) showing variability. There''s
+    a subtle vertical reference line at x=0 and dashed grid lines on the x-axis. The
+    title reads "bar-permutation-importance · seaborn · pyplots.ai" in large font.
+    Axis labels are clear: "Mean Importance (Decrease in Accuracy)" on x-axis and
+    "Feature" on y-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all feature names fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars well-sized, error bars visible with good caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, though lower features with zero importance create
+          visual imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with context ("Mean Importance (Decrease in Accuracy)")
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart for permutation importance
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Features on Y-axis, importance on X-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has error bars, sorted by importance, color gradient, reference line
+          at x=0
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, color mapping is self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: bar-permutation-importance · seaborn · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in importance but many features at zero creates less
+          interesting visual
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wine dataset is a real, neutral, scikit-learn standard dataset
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Importance values (0-0.06) are realistic for permutation importance
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → model → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-sorted/metadata/altair.yaml b/plots/bar-sorted/metadata/altair.yaml
index c16d166b26..f93fe8f007 100644
--- a/plots/bar-sorted/metadata/altair.yaml
+++ b/plots/bar-sorted/metadata/altair.yaml
@@ -22,3 +22,171 @@ review:
   weaknesses:
   - Grid styling (dashed lines) is slightly more prominent than ideal; solid lines
     with lower opacity might be cleaner
+  image_description: The plot shows a horizontal bar chart with 10 product categories
+    sorted by sales in descending order. The bars are colored in Python Blue (#306998).
+    The y-axis displays product names (Laptop, Smartphone, Tablet, Headphones, Smartwatch,
+    Camera, Keyboard, Monitor, Speaker, Mouse from top to bottom). The x-axis shows
+    "Sales (Units)" ranging from 0 to about 4,800. The title "bar-sorted · altair
+    · pyplots.ai" appears at the top center. A subtle dashed grid (light blue/gray)
+    is visible behind the bars. The bars have rounded right edges. The layout is clean
+    with good use of space.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear (~28pt), axis labels are well-sized (~22pt),
+          tick labels readable (~18pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Horizontal orientation prevents label overlap, all text fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized, good spacing between categories
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue), no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good proportions, plot fills canvas well, minor: slight asymmetry
+          in margins'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Sales (Units)" includes units, "Product" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with good opacity (0.3), but no legend needed for
+          single-series; however grid dashes could be less prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sorted bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y, values on X correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted descending, horizontal orientation for readability (as spec
+          recommends)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "bar-sorted · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows clear ranking with good value distribution, minor: could show
+          more variation in middle values'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales is a realistic business scenario, neutral topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values (720-4850 units) are plausible for product sales
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values), no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Declarative encoding with sort="-x", tooltip encoding, cornerRadiusEnd
+          styling, configure_axis/configure_view, HTML export for interactivity
+  verdict: APPROVED
diff --git a/plots/bar-sorted/metadata/bokeh.yaml b/plots/bar-sorted/metadata/bokeh.yaml
index 187439e20f..57d651379f 100644
--- a/plots/bar-sorted/metadata/bokeh.yaml
+++ b/plots/bar-sorted/metadata/bokeh.yaml
@@ -22,3 +22,175 @@ review:
   - No value labels on bars (spec mentions value labels can improve readability)
   - Does not leverage Bokeh interactive capabilities (hover tooltips would enhance
     the plot)
+  image_description: The plot displays a sorted bar chart with 10 vertical bars arranged
+    in descending order from left to right. The bars are colored in a muted blue (#306998
+    - Python blue) with darker blue outlines. The title "bar-sorted · bokeh · pyplots.ai"
+    appears in the top-left corner. The x-axis is labeled "Product" with category
+    labels (Product A through Product J) rotated at a slight angle. The y-axis is
+    labeled "Sales (Units)" with values ranging from 0 to approximately 450. The bars
+    show a clear descending pattern from Product A (highest ~450) to Product J (lowest
+    ~85). The background is clean white with subtle dashed horizontal grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable. Font sizes are
+          appropriate for the 4800x2700 canvas, though title could be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text. X-axis labels are rotated to prevent overlap.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good width (0.7), appropriate spacing, and
+          clear visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme with good contrast. No colorblind issues with
+          monochromatic design.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space. Plot fills adequate area, though there's
+          slightly more whitespace on the right.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Product" and "Sales
+          (Units)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Subtle dashed grid lines with alpha 0.3, but no legend present (though
+          not strictly needed for single-color bars).
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sorted bar chart implementation.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis, correctly mapped.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Descending sort order, clear visual hierarchy, ranking visualization.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, all data visible.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series bar chart.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-sorted · bokeh · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows descending sort clearly with good value spread. Could benefit
+          from more varied gaps between values.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales performance by product is a realistic, neutral business scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (85-450 units) are plausible for sales data, though specific
+          product context is generic.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually hardcoded.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, bokeh.io, bokeh.models, bokeh.plotting).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png".
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource properly, figure with categorical x_range,
+          vbar method, and Bokeh-specific styling. Could leverage more interactive
+          features or hover tools.
+  verdict: APPROVED
diff --git a/plots/bar-sorted/metadata/highcharts.yaml b/plots/bar-sorted/metadata/highcharts.yaml
index 614d90e969..9fe710f76c 100644
--- a/plots/bar-sorted/metadata/highcharts.yaml
+++ b/plots/bar-sorted/metadata/highcharts.yaml
@@ -25,3 +25,175 @@ review:
     making it harder to estimate intermediate values
   - Data labels show K suffix but the axis title says thousands USD - minor redundancy
     in unit notation
+  image_description: The plot shows a horizontal bar chart with 10 product categories
+    sorted by sales values in ascending order (smallest at top, largest at bottom).
+    The bars are displayed in a consistent blue color (#306998). At the top is the
+    title "bar-sorted · highcharts · pyplots.ai" in bold black text, with the subtitle
+    "Monthly Sales by Product Category" in gray below it. Category labels appear on
+    the left y-axis (Pet Supplies at top with 25K, down to Electronics at bottom with
+    145K). Each bar has a data label showing the value with "K" suffix positioned
+    just to the right of the bar. The layout uses clean white background with no visible
+    gridlines on the x-axis (though the y-axis has dashed grid lines). The chart fills
+    the canvas well with good margins.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, labels at 28px, axis text at 24-32px - all highly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with appropriate spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998), colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas, but y-axis (value axis) lacks visible tick labels/gridlines
+          at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales (thousands USD)" with units, but not visible in
+          rendered output (no x-axis gridlines/ticks shown)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle dashed style, legend correctly disabled for single
+          series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sorted bar chart (horizontal)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted order, value labels on bars, horizontal orientation for readability
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible (25K to 145K)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bar-sorted · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows clear sorting and value variation, but all positive values
+          without dramatic differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales by product category is a real, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values from 25K to 145K are realistic sales figures
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart setup → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed comment needed
+          since no randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts bar chart with dataLabels, borderRadius, subtitle,
+          proper options structure, but could leverage more interactive features like
+          tooltip customization
+  verdict: APPROVED
diff --git a/plots/bar-sorted/metadata/letsplot.yaml b/plots/bar-sorted/metadata/letsplot.yaml
index 791998013f..ad47f40913 100644
--- a/plots/bar-sorted/metadata/letsplot.yaml
+++ b/plots/bar-sorted/metadata/letsplot.yaml
@@ -30,3 +30,177 @@ review:
     HTML export
   - 'Grid styling: minor grid is blanked but could keep subtle major grid for value
     reference'
+  image_description: The plot displays a horizontal sorted bar chart showing monthly
+    revenue by product category. The bars are arranged in descending order from top
+    (Electronics at ~$425K) to bottom (Health at ~$62K). All bars use a consistent
+    blue color (#306998). The title "bar-sorted · lets-plot · pyplots.ai" appears
+    at the top in bold. The y-axis is labeled "Product Category" with 10 categories
+    listed, and the x-axis shows "Monthly Revenue" with currency formatting ($0.0K
+    to $400.0K+). The chart uses a minimal theme with subtle dashed horizontal grid
+    lines and clean white background. The layout is well-balanced with the plot utilizing
+    most of the canvas area.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (~26pt), axis titles at ~22pt, tick labels
+          at ~18pt - all perfectly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Horizontal orientation prevents any label overlap, all category names
+          clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths (0.7) are well-proportioned for 10 categories, good spacing
+          between bars
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no color differentiation needed, high contrast
+          against white
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of horizontal
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis label "Product Category" and X-axis "Monthly Revenue" are
+          descriptive but X-axis lacks explicit unit in label (though shown in tick
+          format)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and appropriate alpha, no legend
+          needed (single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sorted bar chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis (correct for horizontal bars)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted descending (largest at top), horizontal orientation for readability
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axis range appropriate ($0K-$425K+)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "bar-sorted · lets-plot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear descending sort with good value variation, but could
+          include more dramatic range differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly revenue by product category is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values in $62K-$425K range are plausible for monthly category
+          revenue
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → sort → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar (ggplot, geom_bar, coord_flip), theme_minimal,
+          scale_y_continuous with currency formatting, proper categorical ordering
+  verdict: APPROVED
diff --git a/plots/bar-sorted/metadata/matplotlib.yaml b/plots/bar-sorted/metadata/matplotlib.yaml
index 4b8ccf6b9e..8eb9506426 100644
--- a/plots/bar-sorted/metadata/matplotlib.yaml
+++ b/plots/bar-sorted/metadata/matplotlib.yaml
@@ -24,3 +24,170 @@ review:
   - Data scenario is somewhat generic (product categories with random values); could
     use a more specific real-world dataset
   - Does not demonstrate any advanced matplotlib features beyond basic bar chart functionality
+  image_description: The plot shows a horizontal bar chart with 8 product categories
+    sorted in descending order by sales revenue. The largest bar (Storage) is at the
+    top with $764K, followed by Furniture ($585K), Office Supplies ($420K), Printing
+    ($271K), Software ($256K), Electronics ($252K), Accessories ($221K), and Networking
+    ($170K) at the bottom. The bars are colored in a consistent blue shade (#306998).
+    Each bar has a value label positioned to its right. The title reads "bar-sorted
+    · matplotlib · pyplots.ai" at the top. The x-axis shows "Sales Revenue ($ thousands)"
+    and the y-axis shows "Product Category". A subtle dashed grid is visible on the
+    x-axis. The top and right spines are removed for a cleaner look.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, value labels positioned outside bars
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales Revenue ($ thousands)", "Product
+          Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate (alpha 0.3), but no legend needed;
+          deducting for minor inconsistency in value label positioning
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sorted bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis, correctly sorted descending
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted descending, horizontal orientation, value labels on bars
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis extends to show all values with labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "bar-sorted · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear hierarchy from largest to smallest, good range of values
+          demonstrating sorting
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Sales by product category is plausible, though generic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in $150K-$800K range are realistic for business revenue
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Axes methods (barh, text, spines), but no advanced matplotlib-specific
+          features like annotations with arrows or colorbar
+  verdict: APPROVED
diff --git a/plots/bar-sorted/metadata/plotly.yaml b/plots/bar-sorted/metadata/plotly.yaml
index 201c6d47dc..49ecf87710 100644
--- a/plots/bar-sorted/metadata/plotly.yaml
+++ b/plots/bar-sorted/metadata/plotly.yaml
@@ -23,3 +23,177 @@ review:
   weaknesses:
   - 'Minor: Grid lines could be slightly more subtle (current alpha 0.1 is good but
     gridwidth=1 makes them slightly prominent)'
+  image_description: The plot displays a horizontal sorted bar chart showing "Monthly
+    sales by product category" with 10 product categories arranged in descending order
+    from top to bottom. Electronics leads with 450 (thousands USD), followed by Clothing
+    (380), Home & Garden (320), Sports (280), Books (250), Toys (220), Beauty (195),
+    Food & Grocery (170), Automotive (140), and Office Supplies (95) at the bottom.
+    All bars are rendered in a consistent Python Blue (#306998) color with darker
+    border lines. Value labels appear outside each bar to the right. The title "bar-sorted
+    · plotly · pyplots.ai" is centered at the top. The x-axis shows "Sales (thousands
+    USD)" ranging from 0 to ~500, and the y-axis shows "Product Category". The background
+    uses Plotly's white template with subtle gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all perfectly
+          readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; horizontal orientation prevents label collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good spacing (bargap=0.25), value labels
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with adequate margins; slight excess whitespace
+          on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Sales (thousands USD)" and "Product Category" are descriptive with
+          units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate, but no legend needed for single-series
+          data (deducting 0 points - N/A for single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sorted bar chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis, correctly sorted descending
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted descending, horizontal orientation (spec recommended for long
+          labels), value labels on bars
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range [0, max*1.15] shows all data with room for labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "bar-sorted · plotly · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear ranking with varied values spanning wide range (95-450),
+          demonstrating the sorting concept well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales by product category is a plausible, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in thousands USD are realistic for retail sales data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses go.Bar with proper configuration, plotly_white template, text
+          positioning, interactive HTML export, proper scaling (width=1600, height=900,
+          scale=3)
+  verdict: APPROVED
diff --git a/plots/bar-sorted/metadata/plotnine.yaml b/plots/bar-sorted/metadata/plotnine.yaml
index ed890cd025..688431095d 100644
--- a/plots/bar-sorted/metadata/plotnine.yaml
+++ b/plots/bar-sorted/metadata/plotnine.yaml
@@ -21,3 +21,172 @@ review:
   weaknesses:
   - Could highlight top category with different color as spec suggests
   - No random seed comment (data is deterministic but convention not followed)
+  image_description: 'The plot displays a sorted bar chart showing "Monthly sales
+    by product category" with 8 vertical bars arranged in descending order from left
+    to right. The bars use a consistent blue color (#306998). Categories from highest
+    to lowest: Electronics (4850), Furniture (3720), Clothing (3150), Food & Beverage
+    (2890), Home & Garden (2340), Sports (1980), Books (1450), and Toys (920). Each
+    bar has a value label above it. The title "bar-sorted · plotnine · pyplots.ai"
+    appears at the top. X-axis labels are rotated 30 degrees for readability. Y-axis
+    shows "Sales ($ thousands)" and X-axis shows "Product Category". Horizontal grid
+    lines are subtle. The overall layout is clean with good use of canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: X-axis labels rotated to prevent overlap, value labels clear above
+          bars
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths appropriate (0.7), good spacing between bars
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, minor extra space at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sales ($ thousands)", "Product Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed, but grid only on Y-axis which is good; however
+          grid alpha=0.3 is acceptable
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sorted bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, correctly sorted descending
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted bars, value labels on bars as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-sorted · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows descending sort clearly, good variation in values, but no highlight
+          of specific categories as spec mentioned
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales by product category is a real business scenario, neutral topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in thousands are realistic for retail sales
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Deterministic data (no random), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_bar, geom_text, pd.Categorical for
+          ordering, theme customization. Good use of plotnine idioms but could use
+          scale_* functions for more distinctive styling
+  verdict: APPROVED
diff --git a/plots/bar-sorted/metadata/pygal.yaml b/plots/bar-sorted/metadata/pygal.yaml
index c0014b3ddf..3000259881 100644
--- a/plots/bar-sorted/metadata/pygal.yaml
+++ b/plots/bar-sorted/metadata/pygal.yaml
@@ -23,3 +23,181 @@ review:
   weaknesses:
   - Missing y-axis title to label the categories dimension
   - Data values could show more variation to better demonstrate sorting benefit
+  image_description: 'The plot displays a horizontal bar chart showing monthly sales
+    by product category, sorted in descending order from top to bottom. The title
+    "bar-sorted · pygal · pyplots.ai" appears at the top. Eight categories are shown:
+    Electronics ($4,850) at the top, followed by Clothing ($3,720), Home & Garden
+    ($2,980), Sports ($2,450), Books ($1,890), Toys ($1,560), Beauty ($1,240), and
+    Food ($980) at the bottom. All bars are rendered in Python Blue (#306998). The
+    x-axis is labeled "Sales (USD)" with tick marks at $0, $1,000, $2,000, $3,000,
+    and $4,000. Value labels are displayed centered within each bar with dollar formatting.
+    Subtle vertical grid lines help with value reading. The layout is clean with good
+    use of whitespace.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, category labels, and value labels are all clearly readable.
+          Font sizes are well-scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere. Category labels on left, values centered
+          in bars, all cleanly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good spacing between them. The visual hierarchy
+          is clear.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue) is colorblind-safe. Good contrast
+          against white background.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout overall, but slight margin asymmetry with more empty
+          space on the right.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis labeled "Sales (USD)" with units clearly indicated.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines are visible and subtle, but no y-axis title labeling the
+          categories dimension.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart type, appropriate for sorted data with
+          category labels.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly on Y-axis, values on X-axis.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted descending (largest first), horizontal orientation for readability,
+          value labels on bars.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately, from $0 to beyond $4,850.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled appropriately since categories are labeled on Y-axis.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "bar-sorted · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear ranking with varied values demonstrating the sorting
+          concept well. Could show more extreme value differences.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales by product category is a plausible, neutral business
+          scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for monthly sales, though all values being in
+          similar thousands range is slightly uniform.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → style → chart → save. No functions
+          or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values), no random generation needed.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to both plot.png and plot.html correctly.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses HorizontalBar, custom Style, print_values with value_formatter,
+          truncate_label. Good use of pygal-specific features but could leverage more
+          interactive SVG capabilities.
+  verdict: APPROVED
diff --git a/plots/bar-sorted/metadata/seaborn.yaml b/plots/bar-sorted/metadata/seaborn.yaml
index db634c1f7e..8d1c0f8f4a 100644
--- a/plots/bar-sorted/metadata/seaborn.yaml
+++ b/plots/bar-sorted/metadata/seaborn.yaml
@@ -24,3 +24,169 @@ review:
   - Uses monochromatic palette instead of seaborn built-in color palettes like colorblind
     or Blues which could add visual interest
   - Grid uses dashed linestyle which is slightly more prominent than recommended
+  image_description: The plot shows a horizontal bar chart displaying 10 product categories
+    sorted by sales in descending order. Electronics leads at $450k, followed by Clothing
+    ($380k), Home & Garden ($320k), Sports ($275k), Books ($240k), Toys ($210k), Beauty
+    ($185k), Automotive ($150k), Food & Grocery ($120k), and Pet Supplies ($95k).
+    All bars are a consistent steel blue color (#306998). The title "bar-sorted ·
+    seaborn · pyplots.ai" appears at the top. The y-axis shows "Product Category"
+    and x-axis shows "Sales (thousands $)". Value labels are positioned at the end
+    of each bar. A subtle dashed grid appears on the x-axis with alpha=0.3. The layout
+    is clean with good proportions - the plot fills the canvas well.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; horizontal orientation prevents label collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, clear visual hierarchy
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no color distinction issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales (thousands $)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha 0.3 is good, but no legend needed; however, grid lines
+          could be slightly more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sorted bar chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted descending, horizontal orientation, value labels present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis extended to 115% to fit value labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-sorted · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows sorted bars with clear ranking; could demonstrate more variation
+          in values to highlight the sorting benefit
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic product sales scenario, neutral business context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values from $95k to $450k are realistic for business sales
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses sns.barplot correctly with hue parameter for API compatibility,
+          but doesn't leverage seaborn's statistical features or built-in palettes
+  verdict: APPROVED
diff --git a/plots/bar-stacked-percent/metadata/altair.yaml b/plots/bar-stacked-percent/metadata/altair.yaml
index ab23f3758b..63d3d5ade9 100644
--- a/plots/bar-stacked-percent/metadata/altair.yaml
+++ b/plots/bar-stacked-percent/metadata/altair.yaml
@@ -24,3 +24,169 @@ review:
   weaknesses:
   - Y-axis label "Share of Energy Mix" could include explicit unit notation like "(%)"
     even though format shows percentages
+  image_description: 'The plot displays a 100% stacked bar chart showing energy mix
+    composition by country (Brazil, China, Germany, India, USA). Each bar is normalized
+    to 100% and divided into four colored segments: Fossil Fuels (dark blue #306998),
+    Nuclear (yellow #FFD43B), Renewables (green #2ca02c), and Hydro (cyan #17becf).
+    The x-axis shows country names with horizontal labels, the y-axis shows percentage
+    from 0% to 100% labeled "Share of Energy Mix". The title "bar-stacked-percent
+    · altair · pyplots.ai" appears at the top center. A legend on the right identifies
+    each energy source. White strokes separate bar segments. Brazil notably shows
+    ~70% hydro (cyan), while India shows ~72% fossil fuels (blue). Germany shows the
+    highest renewables share (~38%).'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text perfectly readable, title 28pt, axis labels 18-22pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text, horizontal x-axis labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bar segments clearly visible with white stroke separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'colorblind-safe palette: blue, yellow, green, cyan'
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: descriptive labels but y-axis could include explicit units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: subtle grid (alpha 0.3), legend well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct 100% stacked bar chart with stack="normalize"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: category on x-axis, components as stacked segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: distinct colors, clear legend, consistent ordering, tooltips
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: y-axis shows full 0-100% range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: legend clearly identifies all four energy sources
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'uses exact format: bar-stacked-percent · altair · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows varied proportions across countries demonstrating different
+          energy profiles
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: real-world energy mix scenario from spec examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: realistic energy mix percentages for each country
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → chart → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: deterministic hardcoded data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only altair and pandas
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: declarative encoding types, stack="normalize", alt.Order(), tooltips,
+          alt.Scale(), configure methods
+  verdict: APPROVED
diff --git a/plots/bar-stacked-percent/metadata/bokeh.yaml b/plots/bar-stacked-percent/metadata/bokeh.yaml
index 66937bfd33..e64a5a46cb 100644
--- a/plots/bar-stacked-percent/metadata/bokeh.yaml
+++ b/plots/bar-stacked-percent/metadata/bokeh.yaml
@@ -22,3 +22,161 @@ review:
   - Dual output (PNG + HTML) leveraging Bokeh interactive capabilities
   weaknesses:
   - Blue and cyan colors could be slightly more distinct for better colorblind accessibility
+  image_description: 'The plot displays a 100% stacked bar chart showing smartphone
+    market share by quarter (Q1 2024 through Q1 2025). Five vertical bars represent
+    each quarter, with four colored segments stacked to 100%: Apple (Python blue #306998)
+    at the bottom, Samsung (golden yellow #FFD43B), Xiaomi (cyan #4ECDC4), and Others
+    (gray #95A5A6) at the top. Each segment displays its percentage value (e.g., 28%,
+    23%, 14%, 35% for Q1 2024). The title "bar-stacked-percent · bokeh · pyplots.ai"
+    appears at top-left, with a legend in the top-right corner. X-axis shows "Quarter"
+    and Y-axis shows "Market Share (%)". White borders separate segments. The layout
+    fills the canvas well with balanced proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text perfectly readable at 36pt title, 28pt labels, 22pt ticks,
+          24pt segment labels
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bars and segments perfectly sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: distinguishable colors, minor concern with blue/cyan proximity
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive with context ("Quarter", "Market Share (%)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: subtle dashed grid, legend well-placed but could be outside plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct 100% stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: categories on X, percentages on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: percentage labels, consistent ordering, clear legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-100%, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: labels match data series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows variation across time, different proportions per quarter
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: smartphone market share is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: realistic market share percentages
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: deterministic hardcoded data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: modern Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-stacked-percent/metadata/highcharts.yaml b/plots/bar-stacked-percent/metadata/highcharts.yaml
index 44941c4843..5d0293d7c8 100644
--- a/plots/bar-stacked-percent/metadata/highcharts.yaml
+++ b/plots/bar-stacked-percent/metadata/highcharts.yaml
@@ -23,3 +23,179 @@ review:
   - Y-axis has too many tick marks (every 2%) creating visual clutter; consider using
     10% or 20% intervals
   - Grid could be more subtle with wider spacing for cleaner appearance
+  image_description: 'The plot displays a 100% stacked column chart showing market
+    share distribution across 5 quarters (Q1 2024 through Q1 2025) for 5 companies.
+    Each bar reaches 100% and is divided into colored segments: TechCorp (blue) at
+    the top showing declining share from 35% to 26%, DataFlow (yellow) growing from
+    25% to 32%, CloudPeak (purple) steadily increasing from 20% to 25%, NetBase (cyan)
+    declining slightly from 12% to 10%, and Others (brown) stable at 7-8%. The title
+    "bar-stacked-percent · highcharts · pyplots.ai" appears at the top with a subtitle
+    "Market Share by Quarter". The y-axis shows percentage values from 0% to 100%,
+    and the x-axis shows quarters. A horizontal legend at the bottom identifies all
+    five companies. Each segment displays its percentage value as a white label.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, tick marks, and data labels are all
+          clearly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; data labels fit within their segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments are well-sized and clearly visible; stacked proportions
+          easy to distinguish
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, purple, cyan, brown)
+          - no red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight deduction for dense y-axis tick
+          labels (every 2%)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Market Share (%)" with units, X-axis has "Quarter"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines visible but y-axis has excessive tick marks (every 2%
+          creates visual clutter)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked column chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, percentage composition on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentage labels within segments, clear legend, consistent component
+          ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, 0-100% range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all five companies
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "bar-stacked-percent · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows proportion changes over time, multiple components, varying
+          segment sizes; slight deduction as all segments are similar size (no very
+          small or dominant segments to test edge cases)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share analysis is a real-world scenario matching spec's application
+          examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values sum to 100% per category as expected; percentages are realistic
+          for market share
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is hardcoded (deterministic) but no explicit seed comment; minor
+          deduction
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts stacking:percent, tooltips, and data labels; could
+          leverage more interactive features but solid implementation
+  verdict: APPROVED
diff --git a/plots/bar-stacked-percent/metadata/letsplot.yaml b/plots/bar-stacked-percent/metadata/letsplot.yaml
index 7a2d3f3279..847a9fec26 100644
--- a/plots/bar-stacked-percent/metadata/letsplot.yaml
+++ b/plots/bar-stacked-percent/metadata/letsplot.yaml
@@ -23,3 +23,177 @@ review:
   weaknesses:
   - Grid lines could be more subtle (current default may be slightly prominent)
   - Could add percentage labels inside segments for larger portions to enhance readability
+  image_description: 'The plot displays a 100% stacked bar chart showing the energy
+    source mix across six European countries (Germany, France, UK, Spain, Italy, Poland).
+    Each bar reaches 100% and is divided into five colored segments representing different
+    energy sources: Coal (dark gray), Natural Gas (blue), Nuclear (purple), Renewables
+    (green), and Other (light gray). The y-axis shows "Share of Energy Mix" with percentage
+    labels from 0% to 100%. The x-axis shows "Country" with the six country names.
+    A legend on the right side clearly identifies each energy source. The title reads
+    "bar-stacked-percent · letsplot · pyplots.ai" in bold at the top. The chart uses
+    a minimal theme with subtle horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (~28pt), axis labels are ~22pt, tick labels
+          ~18pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, country names well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments are clearly visible with good width (0.75) and alpha
+          (0.9)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with distinct colors (gray, blue, purple,
+          green, light gray)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout but slight extra whitespace on left margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Country" and "Share of Energy Mix"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Horizontal grid lines visible but y-axis percentages formatted correctly;
+          however vertical grid removed appropriately
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked bar chart using position="fill"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (countries) on x-axis, components (energy sources) as
+          stacked segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: normalized bars, clear legend, consistent
+          component ordering'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-100% range correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all five energy sources
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-stacked-percent · letsplot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows excellent variation: France heavy nuclear, Poland heavy coal,
+          Spain/Germany high renewables'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real-world scenario with plausible European energy mix data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages are realistic and reflect actual energy distribution
+          patterns
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with position="fill", scale_y_continuous with
+          format, theme_minimal, but could leverage more lets-plot specific interactivity
+          features
+  verdict: APPROVED
diff --git a/plots/bar-stacked-percent/metadata/matplotlib.yaml b/plots/bar-stacked-percent/metadata/matplotlib.yaml
index 61d7cb5537..e9e825cc06 100644
--- a/plots/bar-stacked-percent/metadata/matplotlib.yaml
+++ b/plots/bar-stacked-percent/metadata/matplotlib.yaml
@@ -23,3 +23,179 @@ review:
   - No distinctive matplotlib features used (hatching, custom annotations, or advanced
     styling could enhance)
   - Legend frame adds slight visual weight that could be removed for cleaner look
+  image_description: 'The plot displays a 100% stacked bar chart showing the European
+    energy mix for 6 countries (Germany, France, UK, Spain, Italy, Poland). Each bar
+    is divided into 5 colored segments representing energy sources: Renewables (Python
+    blue #306998), Nuclear (golden yellow #FFD43B), Natural Gas (green #50C878), Coal
+    (crimson red #DC143C), and Other (purple #9370DB). Percentage labels appear within
+    segments that are ≥8%, using white text on dark backgrounds and black text on
+    light backgrounds. The title reads "European Energy Mix · bar-stacked-percent
+    · matplotlib · pyplots.ai" at the top. The legend is positioned outside the plot
+    area to the upper right. A subtle horizontal grid at y=0, 25, 50, 75, 100 aids
+    reading. The country names appear below each bar on the x-axis, with "Country"
+    as the axis label and "Percentage (%)" on the y-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, percentage
+          labels at 14pt bold - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, percentage labels only shown when segment ≥8%
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments are well-sized with white edge separators, clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good palette avoiding pure red-green conflict, though red/green segments
+          adjacent could be slightly improved
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend outside doesn't waste space, good
+          proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Country" and "Percentage (%)" are descriptive; "Percentage (%)"
+          has unit but "Country" is generic'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3, legend well-placed but frameon=True adds
+          visual weight
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, components as stacked segments, values normalized
+          to 100%
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentage labels in segments, distinct colors, clear legend, consistent
+          component order
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All bars show 0-100%, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 energy sources
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "{context} · {spec-id} · {library} · pyplots.ai" format correctly
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows diverse proportions: France dominated by Nuclear (66%), Poland
+          by Coal (68%), Germany more balanced mix, Italy with zero Nuclear - demonstrates
+          the visualization''s ability to show both dominated and mixed compositions'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: European energy mix is a real, comprehensible scenario with plausible
+          country-specific patterns (France nuclear-heavy, Poland coal-dependent)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: TWh values are realistic for European electricity generation
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded numpy array), no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Standard matplotlib bar stacking, no distinctive features like custom
+          annotations, hatching, or advanced styling
+  verdict: APPROVED
diff --git a/plots/bar-stacked-percent/metadata/plotly.yaml b/plots/bar-stacked-percent/metadata/plotly.yaml
index b8a6077b17..8c941eacd0 100644
--- a/plots/bar-stacked-percent/metadata/plotly.yaml
+++ b/plots/bar-stacked-percent/metadata/plotly.yaml
@@ -21,3 +21,178 @@ review:
   - Colorblind-safe palette with Python blue (#306998) as primary color
   weaknesses:
   - Missing subtle gridlines on y-axis would help readers estimate exact percentages
+  image_description: 'The plot displays a 100% stacked bar chart showing the energy
+    mix across six European countries (Germany, France, UK, Spain, Italy, Poland).
+    Each bar is divided into five colored segments representing different energy sources:
+    Renewables (dark blue #306998), Nuclear (yellow #FFD43B), Natural Gas (teal #45B39D),
+    Coal (coral/red #E74C3C), and Other (purple #9B59B6). All bars are normalized
+    to 100% with percentage labels displayed inside each segment in white text. The
+    title "bar-stacked-percent · plotly · pyplots.ai" is centered at the top. A horizontal
+    legend sits below the title identifying all five energy components. The y-axis
+    is labeled "Energy Share (%)" ranging from 0-100%, and the x-axis is labeled "Country".
+    The chart uses a clean plotly_white template with balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 20pt, percentage
+          labels at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fit within segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments are well-sized and clearly visible, percentage labels
+          readable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with good contrast between adjacent
+          segments
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Energy Share (%)" with units, X-axis has "Country"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid visible (while clean, subtle gridlines would help read percentages)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, components as stacked segments, values normalized
+          correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has percentage labels, clear legend, distinct colors, consistent
+          component ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-100% range, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all five energy components
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bar-stacked-percent · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied distributions: France dominated by nuclear, Poland
+          by coal, others by renewables/gas mix - excellent diversity'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Energy mix by European country is a real, comprehensible scenario
+          with plausible proportions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values are generally plausible but some minor inaccuracies (e.g.,
+          France nuclear at 63% vs actual ~70%, Poland coal at ~61% vs actual ~70%)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed as data is deterministic, but data is hardcoded
+          (acceptable for this use case) - deducting as there's no seed even though
+          randomization isn't used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pandas and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses go.Figure with go.Bar traces, proper barmode="stack", text positioning,
+          plotly_white template, horizontal legend, and HTML export for interactivity
+  verdict: APPROVED
diff --git a/plots/bar-stacked-percent/metadata/plotnine.yaml b/plots/bar-stacked-percent/metadata/plotnine.yaml
index 8ef7db8e22..2010c31ce3 100644
--- a/plots/bar-stacked-percent/metadata/plotnine.yaml
+++ b/plots/bar-stacked-percent/metadata/plotnine.yaml
@@ -22,3 +22,178 @@ review:
   weaknesses:
   - Y-axis scale shows 0.00-1.00 (proportions) while label says Market Share (%) -
     minor inconsistency but not misleading since percentages are labeled in bars
+  image_description: 'The plot displays a 100% stacked bar chart showing smartphone
+    market share by quarter from Q1 2023 to Q2 2024. Six vertical bars represent each
+    quarter, with each bar normalized to 100% (shown as 0.00-1.00 on y-axis). Four
+    segments per bar represent: Apple (Python Blue), Samsung (Python Yellow), Xiaomi
+    (Green), and Others (Gray). Each segment displays a percentage label in white
+    bold text (e.g., 23%, 22%, 12%, 43% for Q1 2023). The y-axis is labeled "Market
+    Share (%)", x-axis shows "Quarter" with clear quarterly labels. A legend on the
+    right identifies all four companies. The title correctly follows the format "bar-stacked-percent
+    · plotnine · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (24pt), axis labels (20pt), tick labels (16pt)
+          all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments well-sized with appropriate width (0.7), percentage
+          labels visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Colors are distinguishable: blue, yellow, green, gray - good contrast
+          and colorblind-safe'
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, legend well-positioned
+          on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis says "Market Share (%)" but shows 0.00-1.00 (proportions,
+          not actual percentages)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Vertical grid removed, horizontal grid subtle, legend clear and well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=quarters (categorical), Y=share values normalized, fill=company
+          segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, percentage labels, legend,
+          consistent component ordering'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, bars span 0-100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format `bar-stacked-percent · plotnine · pyplots.ai`
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows proportional variation across quarters (Xiaomi growing, Samsung
+          declining slightly), demonstrates the comparison use case well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Smartphone market share is a real, comprehensible scenario with plausible
+          company names
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: false
+        comment: Values are realistic market shares, but "Others" consistently ~43-46%
+          feels slightly high for a real market analysis
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data (no random generation needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used, well-organized from plotnine
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of plotnine''s grammar of graphics: `ggplot()` + `geom_bar()`
+          with `position_fill()`, `geom_text()` with position matching, `scale_fill_manual()`,
+          and comprehensive `theme()` customization'
+  verdict: APPROVED
diff --git a/plots/bar-stacked-percent/metadata/pygal.yaml b/plots/bar-stacked-percent/metadata/pygal.yaml
index 493c845eaa..22795ccba8 100644
--- a/plots/bar-stacked-percent/metadata/pygal.yaml
+++ b/plots/bar-stacked-percent/metadata/pygal.yaml
@@ -26,3 +26,182 @@ review:
     would better demonstrate the chart type
   - Font sizes in the library rules suggest smaller values (title_font_size=28) but
     implementation uses much larger (60) - while readable, it deviates from guidelines
+  image_description: 'The plot displays a 100% stacked bar chart showing energy mix
+    by country. Six countries are shown on the x-axis (USA, Germany, China, Brazil,
+    Japan, India). Each bar is divided into three colored segments: blue (Fossil Fuels),
+    yellow (Nuclear), and green (Renewable). All bars reach 100% height, with percentage
+    labels displayed within each segment. The title "bar-stacked-percent · pygal ·
+    pyplots.ai" appears at the top. The y-axis shows "Percentage (%)" from 0% to 100%,
+    and the x-axis shows "Country". A legend at the bottom identifies the three energy
+    sources. The chart clearly shows varying energy compositions - Brazil has the
+    highest renewable share (81%), while Japan has the highest fossil fuel dependency
+    (88%).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and percentage values are clearly readable. Font
+          sizes are well-scaled for the canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and values are distinct
+          and readable.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible. Segment proportions are
+          easy to distinguish.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green provide good contrast. Not pure red-green,
+          but yellow-green distinction could be slightly better.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space; plot fills appropriate area with balanced
+          margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Percentage (%)" with units, X-axis has "Country" - both
+          descriptive.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend at bottom is clear and well-placed. Y-axis guides visible
+          but no subtle grid styling.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked bar chart implementation.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, components as stacked segments, values normalized
+          to percentages.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: percentage labels within segments, clear legend,
+          consistent component ordering.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis correctly shows 0-100% range, all data visible.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Fossil Fuels, Nuclear, and Renewable.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows exact format: "bar-stacked-percent · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variety in proportions across countries. Brazil's high renewable
+          vs Japan's high fossil creates good contrast. Nuclear segment is consistently
+          small, could show more variation.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Energy mix by country is a perfect real-world application. Values
+          are plausible and match general knowledge of energy profiles.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Values are realistic. Minor: percentages already sum to 100 in raw
+          data, so normalization is redundant (though code handles it correctly).'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → style → chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation needed).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" but also "plot.html" (extra file, though acceptable
+          for pygal).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's StackedBar, custom Style, value_formatter, print_values.
+          Could leverage more pygal-specific features like tooltips or interpolation.
+  verdict: APPROVED
diff --git a/plots/bar-stacked-percent/metadata/seaborn.yaml b/plots/bar-stacked-percent/metadata/seaborn.yaml
index 985d7beede..8ce40cb741 100644
--- a/plots/bar-stacked-percent/metadata/seaborn.yaml
+++ b/plots/bar-stacked-percent/metadata/seaborn.yaml
@@ -24,3 +24,171 @@ review:
     used for sns.despine() styling - should leverage seaborn native bar plotting functions
     more
   - Legend placement outside the plot creates additional whitespace on the right side
+  image_description: |-
+    The plot shows a 100% stacked bar chart displaying market share data across 6 quarters (Q1 2023 to Q2 2024) for 4 companies. Each bar reaches exactly 100% and is divided into 4 colored segments:
+    - **Company A** (dark blue, #306998) - at the bottom, declining from 35% to 24%
+    - **Company B** (yellow, #FFD43B) - second from bottom, growing from 25% to 34%
+    - **Company C** (teal, #4ECDC4) - third segment, growing slightly from 22% to 27%
+    - **Company D** (coral/salmon, #E76F51) - top segment, declining from 18% to 15%
+
+    The title reads "bar-stacked-percent · seaborn · pyplots.ai" at the top. X-axis shows "Quarter" with 6 quarter labels, Y-axis shows "Market Share (%)" from 0-100. Percentage labels are displayed inside each segment (white text on dark colors, dark text on yellow). The legend is positioned outside the plot on the upper right. A subtle horizontal grid is visible. The layout is clean and professional.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, percentage labels fit within segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments clearly visible with good proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good colorblind-safe palette (blue, yellow, teal, coral), though
+          could use a more established colorblind palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout but legend takes extra space on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Market Share (%)", "Quarter"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend placement outside plot causes extra whitespace; grid is appropriately
+          subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 100% stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (quarters) on X, components (companies) stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: percentage labels, clear legend, consistent
+          ordering'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-100%, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 companies
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-stacked-percent · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows varying proportions across time, demonstrates market shift
+          trends
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by quarter is a perfect, realistic use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages in realistic ranges (15-35%), sensible quarterly progression
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: No issues detected
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Only uses sns.despine() from seaborn; the actual plotting is done
+          with matplotlib's ax.bar(). This is a significant weakness as the implementation
+          could leverage seaborn's native bar plotting capabilities more.
+  verdict: APPROVED
diff --git a/plots/bar-stacked/metadata/altair.yaml b/plots/bar-stacked/metadata/altair.yaml
index 4710d92a1e..26c54a5fdd 100644
--- a/plots/bar-stacked/metadata/altair.yaml
+++ b/plots/bar-stacked/metadata/altair.yaml
@@ -23,3 +23,173 @@ review:
   weaknesses:
   - Legend positioned slightly far from the chart area, creating visual separation
   - Could use Altair selection features for interactive highlighting
+  image_description: 'The plot displays a stacked bar chart showing quarterly sales
+    (Q1-Q4) by product category. Four distinct stacked segments are visible per bar:
+    Electronics (blue, #306998) at the bottom, Clothing (orange, #E69F00), Home &
+    Garden (teal, #009E73), and Sports (yellow, #F0E442) at the top. Total value labels
+    appear above each stack (230, 259, 232, 321). The y-axis shows "Sales (Thousands
+    USD)" ranging from 0-340, and the x-axis shows "Quarter". A clear legend titled
+    "Product Category" is positioned on the right. The title reads "bar-stacked ·
+    altair · pyplots.ai" at the top center. White stroke separators between segments
+    enhance readability. The chart demonstrates clear growth in Q4.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, clean layout with proper spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments well-sized, white stroke separators make segments distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette (blue, orange, teal, yellow) with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace on left margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sales (Thousands USD)", "Quarter"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid has good opacity (0.3) but legend is slightly far from chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values stacked correctly on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: stacked components, total labels, legend,
+          consistent ordering'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis extends appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data categories exactly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bar-stacked · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across quarters, different component sizes, clear
+          Q4 growth trend
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales by product category is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in thousands USD are realistic for quarterly retail
+          data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's declarative encoding with Order channel, tooltips,
+          and layered chart composition; could leverage more advanced features like
+          selections
+  verdict: APPROVED
diff --git a/plots/bar-stacked/metadata/bokeh.yaml b/plots/bar-stacked/metadata/bokeh.yaml
index b29b9ea5c1..2e2b9f4f13 100644
--- a/plots/bar-stacked/metadata/bokeh.yaml
+++ b/plots/bar-stacked/metadata/bokeh.yaml
@@ -24,3 +24,179 @@ review:
   - Legend placement in top-left slightly crowds the upper y-axis tick labels
   - Missing total value labels above each stack (mentioned in spec notes as consideration)
   - Data is hardcoded rather than using random generation with seed
+  image_description: 'The plot displays a stacked bar chart showing quarterly sales
+    data (Q1-Q4) with four product categories stacked vertically: Electronics (dark
+    blue at bottom), Clothing (golden yellow), Home & Garden (teal/turquoise), and
+    Sports (coral/salmon at top). The title "bar-stacked · bokeh · pyplots.ai" appears
+    at the top left. The y-axis shows "Sales (thousands USD)" ranging from 0 to 500,
+    and the x-axis shows "Quarter" with Q1-Q4 labels. A legend in the top-left corner
+    identifies all four categories. The bars show a clear upward trend in total sales
+    across quarters, with Q1 at ~315K, Q2 at ~405K, Q3 at ~430K, and Q4 at ~475K.
+    White separator lines between stacked segments enhance readability. The grid uses
+    subtle dashed horizontal lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 24pt - all perfectly
+          readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend is well-positioned and doesn't
+          cover data
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments are clearly visible with white borders separating them,
+          good width (0.7)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are distinct and colorblind-friendly (blue, yellow, teal,
+          coral - no red-green confusion)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space, minor issue: legend overlaps with y-axis
+          area slightly'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales (thousands USD)" and "Quarter"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3, dashed), but legend placement in top-left
+          crowds the 500 tick mark area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, stacked components correctly layered
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: distinct colors, clear legend, consistent
+          stacking order'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data with 10% padding at top
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All four categories correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exact format: "bar-stacked · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation across categories and quarters, different growth
+          patterns per segment
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Quarterly sales by product category is plausible business scenario,
+          though somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in thousands USD (45-190K) are realistic retail sales figures
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (data is hardcoded/deterministic), but data
+          could benefit from np.random.seed for future extensions
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, HoverTool with custom tooltips, Legend with
+          LegendItem customization. Good use of Bokeh features but could leverage
+          more (e.g., LabelSet for totals).
+  verdict: APPROVED
diff --git a/plots/bar-stacked/metadata/highcharts.yaml b/plots/bar-stacked/metadata/highcharts.yaml
index 9654444614..ed9227625c 100644
--- a/plots/bar-stacked/metadata/highcharts.yaml
+++ b/plots/bar-stacked/metadata/highcharts.yaml
@@ -24,3 +24,172 @@ review:
   - Legend placement conflicts with X-axis title - they visually overlap
   - Data labels within segments could be slightly larger for better readability at
     full resolution
+  image_description: 'The plot displays a stacked column chart showing monthly energy
+    consumption by source (Jan-Jun). Four energy sources are stacked vertically: Solar
+    (teal/cyan at bottom), Natural Gas (yellow), Nuclear (purple), and Coal (darker
+    blue at top). Each bar shows individual segment values as data labels and stack
+    totals above (ranging from 1,225 to 1,317 MWh). The title correctly shows "bar-stacked
+    · highcharts · pyplots.ai" with a descriptive subtitle. A horizontal legend at
+    the bottom identifies all four components. The Y-axis is labeled "Energy (MWh)"
+    and X-axis shows "Month".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend are clearly readable; data labels
+          within segments are slightly small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments are well-sized with appropriate spacing and white borders
+          between segments
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette using blue, yellow, purple, and cyan - no
+          red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with adequate margins; slight excess whitespace at bottom
+          below legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Energy (MWh)" with units, X-axis has "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend overlaps with X-axis title "Month" causing visual conflict
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked column chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values stacked correctly on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Stacking, multiple components, total labels all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis scales appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four energy sources
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied trajectories: Solar growth, Coal decline, Nuclear steady,
+          Natural Gas fluctuation'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Energy consumption by source is highly realistic; values are plausible
+          for regional power mix
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: MWh values in realistic range for monthly consumption
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → series → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current highcharts_core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates plot.html (minor issue, acceptable)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts stacking, tooltips with stack totals, stack labels,
+          but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/bar-stacked/metadata/letsplot.yaml b/plots/bar-stacked/metadata/letsplot.yaml
index f2875c1461..d36e783205 100644
--- a/plots/bar-stacked/metadata/letsplot.yaml
+++ b/plots/bar-stacked/metadata/letsplot.yaml
@@ -24,3 +24,180 @@ review:
     demonstrate part-to-whole analysis
   - Legend stacking order visually shows bottom-to-top but legend reads top-to-bottom
     (minor visual disconnect)
+  image_description: 'The plot displays a stacked bar chart with 4 quarters (Q1, Q2,
+    Q3, Q4) on the x-axis and "Sales (Thousands $)" on the y-axis ranging from 0 to
+    450. Four product categories are stacked within each bar: Electronics (teal blue,
+    #306998) at the bottom, Furniture (golden yellow, #FFD43B), Clothing (purple,
+    #8B5CF6), and Accessories (orange, #F59E0B) at the top. Bold total labels appear
+    above each stack: 315, 377, 386, 430 showing growth trend. A clean legend box
+    on the right identifies "Product Category" with all four items. The title "bar-stacked
+    · letsplot · pyplots.ai" is bold at the top. Minimal theme with no vertical gridlines
+    and subtle horizontal gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (~28pt), axis titles ~22pt, tick labels ~18pt,
+          all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with appropriate width (0.7), alpha 0.9 provides
+          good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast between blue, yellow,
+          purple, and orange
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend is appropriately placed on right with
+          styled background
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Quarter" and "Sales (Thousands $)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle but legend order doesn't match visual stack order
+          (Electronics is at bottom in chart but first in legend which is correct,
+          however the visual stacking shows Accessories on top but it's last in legend
+          - this is acceptable)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Quarters on X, Sales on Y, Products as stacked components
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has distinct colors, legend, total labels above stacks, consistent
+          component order
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data from 0 to above max total
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"bar-stacked · letsplot · pyplots.ai" is correct'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows quarterly trends with 4 product categories, demonstrates part-to-whole
+          relationships well, but all categories show similar relative proportions
+          across quarters
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales by product category is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in thousands are reasonable for sales data, though the range
+          could show more variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data (no random seed needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pandas and lets_plot are imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also saves plot.html (not an issue, but path
+          parameter usage is slightly unusual)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses layer_tooltips for interactivity, ggplot grammar, theme customization,
+          but could leverage more lets-plot specific features like livemap or sampling
+  verdict: APPROVED
diff --git a/plots/bar-stacked/metadata/matplotlib.yaml b/plots/bar-stacked/metadata/matplotlib.yaml
index 788e25daea..0c2d2748b2 100644
--- a/plots/bar-stacked/metadata/matplotlib.yaml
+++ b/plots/bar-stacked/metadata/matplotlib.yaml
@@ -26,3 +26,178 @@ review:
     grow) to better demonstrate stacked bar utility for composition analysis
   - Could leverage matplotlib hatching patterns to further distinguish segments for
     accessibility
+  image_description: 'The plot displays a stacked bar chart showing quarterly revenue
+    (Q1-Q4) for a business, with four product categories stacked: Software (dark blue
+    at bottom), Hardware (yellow), Services (teal), and Support (coral/salmon at top).
+    Each quarter''s bar shows the composition of revenue from these four sources,
+    with total values labeled above each stack ($120M, $129M, $141M, $167M). The y-axis
+    displays "Revenue (Millions USD)" ranging from 0 to 175, and the x-axis shows
+    "Quarter" with Q1-Q4 labels. A legend is positioned outside the plot area in the
+    upper right corner. The chart uses subtle horizontal grid lines and has clean
+    styling with top and right spines removed.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, legend at
+          16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels, legend, and data are well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths are appropriate, segments clearly distinguishable with
+          white edge lines
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast between adjacent segments
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins, legend positioned cleanly
+          outside
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "Revenue (Millions USD)",
+          X-axis has "Quarter"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but legend is outside the plot requiring
+          bbox_inches='tight' - minor deduction for legend placement being slightly
+          far from data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked bar chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, stacked components correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, clear legend, total
+          labels above stacks, consistent ordering'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data with appropriate headroom for labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series exactly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "bar-stacked · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 4 quarters with 4 components, demonstrates growth pattern and
+          composition changes, but all components show similar growth trends (could
+          show more variation like one declining while others grow)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly revenue by product category is a highly realistic and comprehensible
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values in millions USD are realistic for a mid-size company
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Axes methods (ax.bar with bottom parameter), spine customization,
+          and tick_params, but doesn't leverage advanced matplotlib features like
+          hatching patterns or annotations with arrows
+  verdict: APPROVED
diff --git a/plots/bar-stacked/metadata/plotly.yaml b/plots/bar-stacked/metadata/plotly.yaml
index 8b991f986f..f390253421 100644
--- a/plots/bar-stacked/metadata/plotly.yaml
+++ b/plots/bar-stacked/metadata/plotly.yaml
@@ -23,3 +23,159 @@ review:
   - Legend horizontal positioning at top takes vertical space; could use right-side
     vertical legend
   - Yellow and coral colors may be slightly challenging for some colorblind users
+  image_description: 'The plot displays a stacked bar chart showing quarterly revenue
+    (Q1-Q4 2024) for four product categories: Software (dark blue at bottom), Hardware
+    (yellow), Services (teal), and Support (coral at top). Each bar segment contains
+    its value (e.g., 120, 80, 45, 25 for Q1). Total revenue annotations appear above
+    each stack ($270K, $305K, $350K, $390K) showing clear growth. The title "bar-stacked
+    · plotly · pyplots.ai" is centered at the top with a horizontal legend beneath
+    it. Y-axis shows "Revenue (Thousands USD)" and x-axis shows "Quarter". Clean white
+    background with subtle gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text perfectly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bar segments well-sized with clear data labels
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: distinct colors, minor yellow/coral concern for colorblind
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent proportions and margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive with units "Revenue (Thousands USD)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: grid subtle, horizontal legend functional but takes space
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: categories on x-axis, components stacked correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: distinct colors, legend, total labels, consistent ordering, spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: labels match data series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct {spec-id} · {library} · pyplots.ai format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows growth pattern, varying components, part-to-whole relationships
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: plausible quarterly revenue by product category
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: realistic business revenue values in thousands USD
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: deterministic data, no random seed needed but could be explicit
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: modern Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bar-stacked/metadata/plotnine.yaml b/plots/bar-stacked/metadata/plotnine.yaml
index 57cad15e88..7965a542bf 100644
--- a/plots/bar-stacked/metadata/plotnine.yaml
+++ b/plots/bar-stacked/metadata/plotnine.yaml
@@ -24,3 +24,172 @@ review:
   weaknesses:
   - Grid styling uses alpha parameter which may not render consistently
   - Legend title Product Category is inconsistent with column name Category
+  image_description: 'The plot displays a stacked bar chart with 4 bars representing
+    Q1-Q4 on the x-axis. Each bar contains 4 stacked segments representing product
+    categories: Electronics (pink/magenta at bottom), Clothing (blue/purple), Home
+    (orange), and Sports (green/teal at top). White value labels are centered within
+    each segment showing individual sales values. The y-axis shows "Sales (thousands
+    USD)" ranging from 0 to ~175. The title "bar-stacked · plotnine · pyplots.ai"
+    is displayed at the top in bold. A legend on the right identifies each category.
+    The bars show progression with Q4 having the tallest total stack.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 24pt, axis labels 20pt, tick labels 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly visible within their segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized, segments clearly distinguishable with good proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Set2 palette is colorblind-safe with good contrast between categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, minor extra whitespace
+          on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Sales (thousands USD)", X-axis "Quarter" is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle but legend title says "Product Category" while data
+          uses just "Category"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values stacked by component correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has stacked components, segment labels, legend, proper stacking order
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range of cumulative totals
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "bar-stacked · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows stacking, part-to-whole, variation across quarters. Could show
+          more dramatic differences between categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales by product category is a realistic, comprehensible
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in thousands USD are realistic for retail
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded) but no explicit seed comment; minor
+          deduction
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_bar, position_stack, scale_fill_brewer,
+          theme_minimal. Good use of plotnine idioms but nothing exceptionally distinctive
+  verdict: APPROVED
diff --git a/plots/bar-stacked/metadata/pygal.yaml b/plots/bar-stacked/metadata/pygal.yaml
index f3d689333a..9c0683a01a 100644
--- a/plots/bar-stacked/metadata/pygal.yaml
+++ b/plots/bar-stacked/metadata/pygal.yaml
@@ -22,3 +22,176 @@ review:
   - Grid lines are minimal (only one visible at y=50); adding more y-axis guides would
     improve readability
   - Could use pygal's tooltips or other interactive features more prominently
+  image_description: 'The plot displays a stacked bar chart showing quarterly revenue
+    by product category. Four bars (Q1-Q4) are shown on the x-axis with "Revenue (Million
+    USD)" on the y-axis ranging from 0 to ~55. Each bar consists of four stacked segments
+    in distinct colors: Software (steel blue, bottom), Hardware (golden yellow), Services
+    (teal/cyan), and Cloud (coral/salmon, top). Value labels (e.g., 12.5, 8.3, 5.2,
+    3.1) are displayed within each segment. The title "bar-stacked · pygal · pyplots.ai"
+    appears at the top. A legend at the bottom identifies all four categories. The
+    chart demonstrates clear quarterly growth with Q4 showing the highest cumulative
+    revenue (~56M). A subtle dotted grid line is visible at y=50.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and value labels are all readable. Font sizes
+          are appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; value labels fit within segments.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments are well-sized and clearly visible with good spacing.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color contrast; blue, yellow, teal, and coral are distinguishable.
+          Not perfectly optimized for colorblindness but acceptable.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with proper margins; legend at bottom is well-positioned
+          but there is some empty space.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Quarter" and "Revenue
+          (Million USD)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Only one subtle grid line visible at y=50, missing intermediate grid
+          lines for easier value reading. Legend is well-placed.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked bar chart type.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values stacked correctly.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has distinct colors, legend, value labels on segments as spec suggests.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis accommodates the tallest stack.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match the data series correctly.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "bar-stacked · pygal · pyplots.ai" format.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows stacking well, demonstrates part-to-whole relationships. Could
+          show more variation in component ordering.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly revenue by product category is a realistic, comprehensible
+          business scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values in millions are realistic for a mid-sized company;
+          growth pattern is plausible.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → style → chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data, no randomness.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style are imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves to plot.png and plot.html correctly.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's custom Style, StackedBar chart type, print_values feature,
+          and legend_at_bottom. Good use of pygal-specific configuration but nothing
+          highly distinctive.
+  verdict: APPROVED
diff --git a/plots/bar-stacked/metadata/seaborn.yaml b/plots/bar-stacked/metadata/seaborn.yaml
index d31f1840e5..4fc36cb768 100644
--- a/plots/bar-stacked/metadata/seaborn.yaml
+++ b/plots/bar-stacked/metadata/seaborn.yaml
@@ -25,3 +25,174 @@ review:
     np.random.seed(42) for consistency)
   - 'Two blue shades (Electronics #306998 and Home & Garden #4B8BBE) may be difficult
     to distinguish for some colorblind users'
+  image_description: 'The plot shows a stacked bar chart displaying monthly sales
+    data (January through June) for four product categories: Electronics (dark blue),
+    Clothing (yellow), Home & Garden (light blue), and Sports (coral/salmon). Each
+    bar is properly stacked with total values labeled above ($285K to $435K). The
+    title follows the required format "bar-stacked · seaborn · pyplots.ai". The y-axis
+    shows "Sales (Thousands $)" ranging from 0 to 500, and the x-axis shows "Month".
+    A legend titled "Product Category" is positioned to the right of the plot. The
+    grid is subtle with horizontal dashed lines, and the overall layout is clean with
+    good spacing between bars.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments well-sized with white edge lines for separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinct (blue, yellow, light blue, coral) but two blue
+          shades could be challenging for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas appropriately, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has units "Sales (Thousands $)", X-axis just "Month" (acceptable
+          but could be more descriptive)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), but legend appears to overlap
+          with the rightmost bar edge slightly
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values stacked correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Stacked components, total labels, legend, consistent ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis extends to accommodate totals
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match product categories correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: bar-stacked · seaborn · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation across months, different growth patterns per category,
+          part-to-whole relationships clear
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales by product category is a real business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in thousands are realistic for retail business
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed set (data is deterministic, but best practice to include
+          seed for any random operations)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, pandas, seaborn imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.histplot with weights/multiple="stack" which is seaborn's
+          approach, plus sns.set_style, sns.set_context, and sns.despine. However,
+          seaborn doesn't have a native stacked bar function, so using histplot with
+          weights is a workaround rather than a distinctive strength.
+  verdict: APPROVED
diff --git a/plots/bland-altman-basic/metadata/altair.yaml b/plots/bland-altman-basic/metadata/altair.yaml
index 2db64587b5..133d8d4c0b 100644
--- a/plots/bland-altman-basic/metadata/altair.yaml
+++ b/plots/bland-altman-basic/metadata/altair.yaml
@@ -25,3 +25,173 @@ review:
   - Grid configuration uses gridOpacity=0.3 which is acceptable but could have legend
     explaining the reference lines for publication quality
   - No formal legend element present, though annotations compensate for this
+  image_description: The plot displays a Bland-Altman agreement plot comparing two
+    blood pressure measurement methods. Blue circular scatter points (#306998 Python
+    blue) with moderate transparency (alpha 0.7) represent 80 paired observations.
+    A solid dark blue horizontal line at y = -2.44 indicates the mean bias. Two dashed
+    yellow/gold lines at y = +6.55 and y = -11.44 mark the 95% limits of agreement
+    (±1.96 SD). The x-axis shows "Mean of Two Methods (mmHg)" ranging from approximately
+    80-160, and the y-axis shows "Difference (Method 1 - Method 2) (mmHg)" ranging
+    from about -18 to 10. The title "bland-altman-basic · altair · pyplots.ai" appears
+    at the top. Annotations for each reference line appear on the right side of the
+    plot. The grid is subtle with light gray lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; tick labels well-sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or data elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Marker size (200) appropriate for 80 points; good transparency reveals
+          overlapping points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe; good differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units (mmHg)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is good (alpha 0.3), but no legend present (not required for
+          this plot type)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Bland-Altman plot structure
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = mean of pairs, Y = difference; correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Mean bias line, ±1.96 SD lines, annotations all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points and reference lines
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type; annotations serve as legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bland-altman-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows appropriate spread with bias; could show slight proportional
+          bias trend more clearly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Blood pressure measurements from two sphygmomanometers is realistic
+          medical validation scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Blood pressure values (80-160 mmHg) are realistic; bias of -2.44
+          mmHg is clinically meaningful
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used; all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's layering system, tooltips for interactivity, encoding
+          types; could leverage more declarative features
+  verdict: APPROVED
diff --git a/plots/bland-altman-basic/metadata/bokeh.yaml b/plots/bland-altman-basic/metadata/bokeh.yaml
index ed1c6f2f1a..062fcaf929 100644
--- a/plots/bland-altman-basic/metadata/bokeh.yaml
+++ b/plots/bland-altman-basic/metadata/bokeh.yaml
@@ -25,3 +25,180 @@ review:
     (consider top-right or outside plot)
   - Missing HoverTool which would enhance interactivity and is a key Bokeh strength
   - Grid styling is good but legend could have cleaner integration
+  image_description: 'The plot displays a Bland-Altman agreement plot with 80 blue
+    scatter points showing paired blood pressure measurements. Three horizontal reference
+    lines are present: a solid blue line at the mean bias (-2.82 mmHg), and two dashed
+    yellow lines at the upper (+13.34 mmHg) and lower (-18.99 mmHg) limits of agreement
+    (±1.96 SD). Each reference line has an annotation on the left side showing its
+    value. The title "bland-altman-basic · bokeh · pyplots.ai" appears at the top.
+    The x-axis is labeled "Mean of Two Methods (mmHg)" and the y-axis "Difference
+    (Method 1 - Method 2) (mmHg)". A legend labeled "Observations" is in the top left.
+    The background is white with subtle dashed grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and annotations are all clearly
+          readable at the 4800x2700 resolution with appropriate font sizes (28pt title,
+          22pt axis labels, 18pt ticks, 20pt annotations)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; annotations are well-positioned with
+          appropriate offsets
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter points are well-sized (size=18) with good alpha (0.7) for
+          the 80 data points; slightly smaller than ideal for this density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot fills most of the area but has slightly
+          more whitespace on the right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units (mmHg)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3) but legend placement partially obscures
+          upper-left data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Bland-Altman plot showing difference vs. mean
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows mean of two methods, Y-axis shows difference (method1
+          - method2)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: mean bias line, ±1.96 SD limits,
+          annotations with values, transparent scatter points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels observations
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bland-altman-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good spread of data with variation, a few outliers beyond limits
+          of agreement; could show slightly more extreme cases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Blood pressure measurements from two sphygmomanometers is a classic,
+          realistic Bland-Altman application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Blood pressure values (100-160 mmHg systolic range) are realistic;
+          difference values are plausible but the bias and SD seem slightly high for
+          typical device comparisons
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Span for reference lines, and Label for annotations;
+          also outputs HTML for interactivity. Could leverage more Bokeh-specific
+          features like HoverTool for data inspection.
+  verdict: APPROVED
diff --git a/plots/bland-altman-basic/metadata/highcharts.yaml b/plots/bland-altman-basic/metadata/highcharts.yaml
index 2912248b2b..35351742e1 100644
--- a/plots/bland-altman-basic/metadata/highcharts.yaml
+++ b/plots/bland-altman-basic/metadata/highcharts.yaml
@@ -27,3 +27,177 @@ review:
     individual measurement details
   - Red color for limits of agreement could be replaced with a more colorblind-safe
     alternative
+  image_description: The plot displays a Bland-Altman agreement visualization with
+    80 blue scatter points representing paired blood pressure observations. The x-axis
+    shows "Mean of Two Methods (mmHg)" ranging from approximately 96 to 164, and the
+    y-axis shows "Difference (Method 1 - Method 2) (mmHg)" ranging from about -25
+    to 16. A solid blue horizontal line marks the mean difference at -2.67, and two
+    red dashed horizontal lines indicate the ±1.96 SD limits of agreement at approximately
+    +9.49 and -14.84. Each reference line is labeled with its value on the right side
+    of the plot. The title "bland-altman-basic · highcharts · pyplots.ai" appears
+    at the top, and a legend showing "Paired Observations" is positioned in the upper
+    right corner. Points have moderate transparency (alpha ~0.6) allowing overlapping
+    observations to be distinguished.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title is large and bold, axis labels
+          are appropriately sized, tick labels are legible'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized (radius 14) with good transparency
+          for 80 points; slightly larger than optimal for this density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/red scheme is distinguishable but red for limits is not the
+          most colorblind-safe choice; better than red-green but not optimal
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well with appropriate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units (mmHg)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is extremely subtle (alpha 0.1), legend is functional but positioned
+          far from data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Bland-Altman scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis correctly shows mean of pairs, Y-axis shows difference
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: mean difference line, ±1.96 SD limits,
+          annotations with values'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels the scatter series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: bland-altman-basic · highcharts · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Data shows full range of agreement/disagreement with points both
+          within and outside limits of agreement
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Blood pressure measurement comparison is a classic Bland-Altman use
+          case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for systolic blood pressure (90-160 mmHg range)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic scatter series and plotLines; does not leverage Highcharts
+          interactivity, tooltips, or other distinctive features in a meaningful way
+          beyond what other libraries offer
+  verdict: APPROVED
diff --git a/plots/bland-altman-basic/metadata/letsplot.yaml b/plots/bland-altman-basic/metadata/letsplot.yaml
index 969f061b17..9d5f407433 100644
--- a/plots/bland-altman-basic/metadata/letsplot.yaml
+++ b/plots/bland-altman-basic/metadata/letsplot.yaml
@@ -26,3 +26,179 @@ review:
     at full resolution
   - Could add tooltips for interactivity since letsplot supports it
   - Grid styling uses hardcoded color instead of leveraging theme defaults
+  image_description: The plot displays a Bland-Altman agreement analysis comparing
+    two blood pressure measurement methods. The visualization shows 80 blue scatter
+    points with moderate transparency (alpha=0.7) plotted against a white background
+    with subtle gray grid lines. The x-axis shows "Mean of Two Methods (mmHg)" ranging
+    from approximately 90 to 155 mmHg, and the y-axis shows "Difference (Method 1
+    - Method 2) (mmHg)" ranging from about -20 to +12. A solid green horizontal line
+    indicates the mean bias at -2.55 mmHg, while red dashed horizontal lines mark
+    the limits of agreement at +7.84 mmHg (upper) and -12.93 mmHg (lower). Each reference
+    line has a clearly visible boxed label positioned on the left side with color-matched
+    text (green for bias, red for LoA). The title "bland-altman-basic · letsplot ·
+    pyplots.ai" appears at the top. The layout is clean and well-proportioned with
+    good use of the canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and annotations are all clearly readable
+          at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; annotation labels are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are appropriately sized (size=5) with good alpha (0.7) for
+          80 data points; minor deduction as points could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/green/red scheme is colorblind-friendly; colors serve distinct
+          purposes (data/bias/LoA)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization but slight asymmetry with more whitespace
+          on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Mean of Two Methods (mmHg)" and
+          "Difference (Method 1 - Method 2) (mmHg)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed for this plot type, grid is subtle and appropriate
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Bland-Altman plot with scatter points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = mean of paired observations, Y = difference (correct)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Mean bias line, ±1.96 SD limits of agreement, annotations all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points clearly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type; line annotations serve as legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bland-altman-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good spread of differences around the mean, includes points
+          near and beyond LoA boundaries; minor deduction as no extreme outliers highlighted
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Blood pressure readings from two sphygmomanometers is a perfect real-world
+          application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: BP values (100-160 mmHg) are realistic; the negative bias (-2.55)
+          with ~5mmHg SD is plausible but slight bias direction could be reconsidered
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and lets_plot imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses lets-plot ggplot grammar effectively with geom_point, geom_hline,
+          geom_label, theme_minimal; could leverage more advanced features like interactive
+          tooltips
+  verdict: APPROVED
diff --git a/plots/bland-altman-basic/metadata/matplotlib.yaml b/plots/bland-altman-basic/metadata/matplotlib.yaml
index 5f0d32b9d1..ee1be7c570 100644
--- a/plots/bland-altman-basic/metadata/matplotlib.yaml
+++ b/plots/bland-altman-basic/metadata/matplotlib.yaml
@@ -24,3 +24,178 @@ review:
   - Legend and right-side annotations show redundant information; consider removing
     one or making them complementary
   - Marker size (s=150) slightly below the recommended 100-200 range for 80 data points
+  image_description: 'The plot displays a Bland-Altman agreement plot comparing two
+    blood pressure measurement methods. Blue circular markers (80 data points) are
+    scattered showing the difference between methods plotted against the mean of both
+    methods. A solid dark blue horizontal line indicates the mean bias at -2.44 mmHg.
+    Two dashed golden/yellow horizontal lines mark the limits of agreement at +1.96
+    SD (6.55 mmHg) and -1.96 SD (-11.44 mmHg). A subtle dotted gray reference line
+    is at y=0. The right side of the plot has text annotations showing the bias and
+    limits of agreement values. The x-axis is labeled "Mean of Two Methods (mmHg)"
+    ranging from 80-150, and the y-axis is labeled "Difference (Method 1 - Method
+    2) (mmHg)" ranging from -15 to about 7. The title follows the required format:
+    "bland-altman-basic · matplotlib · pyplots.ai". A legend in the upper left explains
+    the three horizontal lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, annotations
+          at 14pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all annotations positioned cleanly
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at s=150 with alpha=0.6 appropriate for 80 points, though
+          could be slightly larger per guidelines (100-200 recommended for 30-100
+          points)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout, though right margin annotations extend beyond
+          typical plot area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units (mmHg)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-placed but provides redundant information (same values
+          shown in annotations); grid is subtle at alpha=0.3
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Bland-Altman plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows mean of pairs, Y-axis shows difference
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Mean line, ±1.96 SD lines, annotations all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes line meanings
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bland-altman-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good spread of differences with points both within and outside
+          limits of agreement; demonstrates realistic bias
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Blood pressure sphygmomanometer comparison is a classic real-world
+          application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Blood pressure values around 80-150 mmHg are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses basic matplotlib features (axhline, scatter, text annotations)
+          correctly but doesn't leverage advanced features like fill_between for confidence
+          regions
+  verdict: APPROVED
diff --git a/plots/bland-altman-basic/metadata/plotly.yaml b/plots/bland-altman-basic/metadata/plotly.yaml
index f9696388a9..aeee8da087 100644
--- a/plots/bland-altman-basic/metadata/plotly.yaml
+++ b/plots/bland-altman-basic/metadata/plotly.yaml
@@ -24,3 +24,174 @@ review:
   weaknesses:
   - Data could include a few clear outliers beyond the limits of agreement to demonstrate
     how such cases appear on the plot
+  image_description: 'The plot displays a Bland-Altman agreement plot on a white background
+    with the title "bland-altman-basic · plotly · pyplots.ai" at the top center. The
+    x-axis shows "Mean of Two Methods (mmHg)" ranging from approximately 85 to 155,
+    and the y-axis shows "Difference (Method 1 − Method 2) (mmHg)" ranging from -15
+    to about 8. There are 80 blue circular markers (color #306998) with white edges
+    and moderate transparency scattered across the plot. A solid blue horizontal line
+    indicates the mean difference at -2.44, and two dashed gold/yellow horizontal
+    lines mark the limits of agreement at +1.96 SD (6.55) and -1.96 SD (-11.44). Annotations
+    on the right side clearly label these reference lines with their values. The grid
+    is subtle with light gray lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; annotations positioned cleanly on the
+          right
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size 14 with 0.7 opacity is appropriate for 80 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and gold/yellow color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good margins, plot fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units (mmHg)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid at 0.1 alpha is appropriate, but legend is hidden when it could
+          help identify the observations trace
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Bland-Altman difference plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = mean of pairs, Y = difference - exactly per spec
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Mean line, ±1.96 SD limits, annotations all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (legend disabled appropriately)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "bland-altman-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows bias and spread well, but data is relatively symmetric around
+          mean without clear outliers beyond LOA to demonstrate boundary cases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Blood pressure measurement comparison between sphygmomanometers is
+          a classic Bland-Altman use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: BP values 85-155 mmHg are realistic systolic readings; bias of ~2.5
+          mmHg is clinically plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of `add_hline` with annotations, hover templates for interactivity,
+          and HTML export for interactive viewing
+  verdict: APPROVED
diff --git a/plots/bland-altman-basic/metadata/pygal.yaml b/plots/bland-altman-basic/metadata/pygal.yaml
index f41a15a9f9..1376e68086 100644
--- a/plots/bland-altman-basic/metadata/pygal.yaml
+++ b/plots/bland-altman-basic/metadata/pygal.yaml
@@ -22,3 +22,161 @@ review:
   weaknesses:
   - LoA values annotated only in legend, not directly on the plot as text annotations
     (spec requests annotations on the plot)
+  image_description: 'The plot displays a Bland-Altman agreement analysis comparing
+    two blood pressure measurement methods (sphygmomanometers). Blue scatter points
+    (50 subjects) show the difference vs. mean of paired measurements. The x-axis
+    shows "Mean of Two Methods (mmHg)" ranging from ~100-155 mmHg. The y-axis shows
+    "Difference (Method 1 - Method 2) (mmHg)" ranging from -24 to +24. A solid red
+    horizontal line marks the mean bias (-1.7 mmHg), with dashed green (upper: +14.6)
+    and purple (lower: -17.9) lines showing the ±1.96 SD limits of agreement. The
+    white background has subtle grid lines, and a clean legend at the bottom identifies
+    all four series with their values.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: all text readable, title and labels clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: markers well-sized for 50 points, good opacity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: distinct colors (blue, red, green, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive with units "(mmHg)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: subtle grid, well-placed bottom legend
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct Bland-Altman XY scatter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: mean on x-axis, difference on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: has mean bias and LoA lines; values in legend but not annotated directly
+          on plot
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: correctly labeled with values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: correct "bland-altman-basic · pygal · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: good spread, includes outliers near LoA bounds
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: blood pressure comparison is authentic medical use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: realistic systolic BP values (100-155 mmHg)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean linear flow, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bland-altman-basic/metadata/seaborn.yaml b/plots/bland-altman-basic/metadata/seaborn.yaml
index 2189c825aa..f2b96d52d5 100644
--- a/plots/bland-altman-basic/metadata/seaborn.yaml
+++ b/plots/bland-altman-basic/metadata/seaborn.yaml
@@ -26,3 +26,177 @@ review:
     framealpha competes with data region)'
   - 'Library features (LF-01): Could leverage more seaborn-specific features like
     rugplot for marginal distributions or confidence intervals'
+  image_description: 'The plot displays a Bland-Altman agreement plot comparing two
+    blood pressure measurement methods. Blue circular markers (n=80) with white edges
+    and moderate transparency show individual paired observations. The x-axis displays
+    "Mean of Two Methods (mmHg)" ranging from approximately 80-150 mmHg, and the y-axis
+    shows "Difference (Method 1 - Method 2) (mmHg)" ranging from about -15 to +7 mmHg.
+    A solid yellow horizontal line indicates the mean difference (bias) at -1.9 mmHg.
+    Two red dashed horizontal lines mark the 95% limits of agreement at +7.1 and -10.9
+    mmHg. A subtle gray dotted zero reference line is included. Numeric annotations
+    appear on the right edge of the plot showing the exact values. The legend in the
+    upper left displays the mean and LoA values with units. The title follows the
+    required format: "bland-altman-basic · seaborn · pyplots.ai". The layout is well-balanced
+    with a subtle gray dashed grid.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized (s=150) with good alpha (0.7) for 80 points; slight
+          deduction for some edge crowding
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/red color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units (mmHg)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha at 0.3 is acceptable, but zero reference line could be
+          more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Bland-Altman plot with differences vs means
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows mean of pair, Y-axis shows difference
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Mean line, LoA lines, annotations all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly describe lines with values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: bland-altman-basic · seaborn · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in differences across the measurement range; slight
+          proportional error visible; one minor deduction for not showing more extreme
+          outliers outside LoA
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Blood pressure measurements from two sphygmomanometers is a classic
+          real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values realistic (80-150 mmHg systolic BP range), though distribution
+          could be slightly tighter
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.scatterplot correctly, but the horizontal reference lines
+          use matplotlib's axhline rather than seaborn-specific statistical annotation
+          features
+  verdict: APPROVED
diff --git a/plots/box-basic/metadata/altair.yaml b/plots/box-basic/metadata/altair.yaml
index b53dafd946..7cdd1fd7ff 100644
--- a/plots/box-basic/metadata/altair.yaml
+++ b/plots/box-basic/metadata/altair.yaml
@@ -25,3 +25,170 @@ review:
   - Does not include tooltips which are a key Altair feature for data exploration
   - Legend is explicitly hidden (legend=None) when it could aid readability
   - Grid opacity configured but not showing strongly in the visualization
+  image_description: 'The plot displays 5 box plots comparing salary distributions
+    across departments (Engineering, Finance, HR, Marketing, Sales). Each department
+    has a distinct color: blue for Engineering, yellow for Finance, light blue for
+    HR, gray for Marketing, and green for Sales. The boxes show quartiles with white
+    median lines, whiskers extend to show data range, and outliers appear as hollow
+    circles. The y-axis displays "Salary ($)" with currency formatting ($0 to $180,000),
+    and the x-axis shows "Department" with angled labels. The title "box-basic · altair
+    · pyplots.ai" appears at the top. A subtle grid is present in the background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable, tick labels well-sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Box plots well-sized, outliers visible but could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Good color differentiation, colorblind-friendly palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units ("Salary ($)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but legend is hidden when it could help identify
+          departments
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median line, outliers, whiskers all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colors match categories (legend hidden but consistent)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows outliers and different distributions, varied spreads across
+          departments
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary distributions across departments is a real, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Salaries are realistic but some outliers seem extreme (e.g., $170k
+          in Sales seems high relative to base)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses mark_boxplot with customization but doesn't leverage Altair's
+          interactive features or tooltips
+  verdict: APPROVED
diff --git a/plots/box-basic/metadata/bokeh.yaml b/plots/box-basic/metadata/bokeh.yaml
index 9d795fb516..be212bfb4c 100644
--- a/plots/box-basic/metadata/bokeh.yaml
+++ b/plots/box-basic/metadata/bokeh.yaml
@@ -23,3 +23,176 @@ review:
   - Does not leverage Bokeh's interactive features (hover tooltips showing statistics
     would be valuable)
   - Axis labels lack units (could be "Test Score (points)" for clarity)
+  image_description: The plot displays a box-and-whisker plot comparing test score
+    distributions across four classes (Class A, B, C, D). Each box shows the interquartile
+    range (IQR) with a black median line. Class A has a blue box (~70-78 range), Class
+    B has a yellow/gold box (~82-88 range) with a tight distribution, Class C has
+    a light blue box (~57-80 range) showing the widest spread, and Class D has a gray
+    box (~72-83 range). Whiskers extend appropriately and outliers are displayed as
+    hollow circles - Class A has one low outlier (~49), Class B has outliers both
+    above (~99) and below (~45-52), Class C has outliers above (~135) and below (~8),
+    and Class D has outliers above (~103-108) and below (~40-42). The title reads
+    "box-basic · bokeh · pyplots.ai" and axes are labeled "Class" (x-axis) and "Test
+    Score" (y-axis). The layout is clean with a dashed horizontal grid.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable. Font
+          sizes are appropriate for 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Boxes, whiskers, and outliers are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color choices (blue, yellow, light blue, gray) that are distinguishable;
+          yellow may have slight contrast issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content, well-centered
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Test Score", "Class") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and appropriate alpha; no legend
+          needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box-and-whisker plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, numerical values on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median lines, boxes (Q1-Q3), whiskers at 1.5*IQR, outliers as points,
+          different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data including outliers visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (categories labeled on x-axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "box-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: shows different spreads (tight Class B vs wide
+          Class C), outliers on both high and low ends, varying medians'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores across classes is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores 0-140 range is realistic for educational data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Implementation manually calculates box plot statistics instead of
+          using Bokeh's built-in capabilities. While functionally correct, it doesn't
+          showcase Bokeh-specific features like hover tools or interactive elements
+          that would distinguish it from other libraries.
+  verdict: APPROVED
diff --git a/plots/box-basic/metadata/highcharts.yaml b/plots/box-basic/metadata/highcharts.yaml
index cd1010989c..aa472b64d2 100644
--- a/plots/box-basic/metadata/highcharts.yaml
+++ b/plots/box-basic/metadata/highcharts.yaml
@@ -25,3 +25,175 @@ review:
     Department and Score
   - Legend shows Distribution and Outliers which is functional but takes up space
     without adding much value for this simple plot
+  image_description: 'The plot displays 5 box-and-whisker plots arranged horizontally
+    for Groups A through E. Each box is colored distinctly: blue (Group A), yellow
+    (Group B), purple (Group C), cyan (Group D), and brown (Group E). The title "box-basic
+    · highcharts · pyplots.ai" appears at the top in bold black text. The X-axis is
+    labeled "Category" with group names below each box. The Y-axis is labeled "Value"
+    ranging from 0 to 125. Each box shows quartile ranges with dark median lines.
+    Outliers are displayed as red circular points - visible for Groups A (one low
+    outlier around 24), B (one high outlier around 105), C (two outliers around 19
+    and 76), and E (one high outlier around 115). The distributions vary noticeably:
+    Group D has the highest median (~70), Group C has the lowest (~45), and Group
+    E shows the widest spread. Grid lines are subtle with light gray coloring. The
+    layout is clean with a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels clearly readable, tick labels slightly small
+          but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Boxes well-sized, whiskers and outliers clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette used (blue, yellow, purple, cyan, brown)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (generic "Value")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle and appropriate, legend present but not essential for
+          this plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median lines, quartile boxes, whiskers at 1.5*IQR, outliers as points,
+          different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data including outliers (0-125)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend shows Distribution and Outliers series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows outliers, different medians, varying IQRs, different whisker
+          lengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Generic groups rather than real-world scenario (e.g., departments,
+          products)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in reasonable range (0-115), sensible for generic measurements
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves plot.png but HTML also saved (minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses BoxPlotSeries and ScatterSeries for outliers, colorByPoint,
+          proper highcharts-more.js integration; could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/box-basic/metadata/letsplot.yaml b/plots/box-basic/metadata/letsplot.yaml
index a653132802..fc6e624747 100644
--- a/plots/box-basic/metadata/letsplot.yaml
+++ b/plots/box-basic/metadata/letsplot.yaml
@@ -22,3 +22,165 @@ review:
   - Appropriate text sizes (24pt title, 20pt labels, 16pt ticks)
   weaknesses:
   - Grid lines could be slightly more visible (currently very subtle with theme_minimal)
+  image_description: 'The plot displays 5 box plots showing salary distributions across
+    departments: Engineering (blue), Marketing (yellow), Sales (red), HR (green),
+    and Finance (purple). Each box clearly shows the median line, IQR (box), and whiskers
+    extending to 1.5*IQR. Outliers are displayed as large black dots - visible above
+    Engineering (~125K, ~137K), Marketing (~107K), Sales (~125K, ~131K), HR (~30K,
+    ~90K), and Finance (~120K, ~122K). The title "box-basic · letsplot · pyplots.ai"
+    appears at the top in appropriate size. X-axis labeled "Department" and Y-axis
+    labeled "Salary ($)" with values ranging from ~20,000 to 140,000. Clean minimal
+    theme with subtle grid lines. Layout is well-balanced with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title ~24pt, axis labels ~20pt, tick labels ~16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box sizes optimal, outliers clearly visible with size=4
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors (blue, yellow, red, green, purple) with good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Perfect proportions, no cut-off, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Salary ($)", "Department"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and good, legend correctly hidden (not needed) - but
+          grid could be slightly more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box-and-whisker plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median line ✓, outliers as points ✓, whiskers at 1.5*IQR ✓, different
+          colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend hidden appropriately (colors explained by x-axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows outliers, different spreads (Sales widest, HR narrowest), different
+          medians
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary by department is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic salary values ($20K-$140K range appropriate for US salaries)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/box-basic/metadata/matplotlib.yaml b/plots/box-basic/metadata/matplotlib.yaml
index 2e79331201..6533d6c53d 100644
--- a/plots/box-basic/metadata/matplotlib.yaml
+++ b/plots/box-basic/metadata/matplotlib.yaml
@@ -25,3 +25,176 @@ review:
     use more distinct colors for better differentiation
   - Performance scores exceed 100 for some outliers which is slightly inconsistent
     if interpreting as percentage scores
+  image_description: 'The plot displays a box-and-whisker chart with 4 departments
+    (Engineering, Marketing, Sales, Support) on the x-axis and Performance Score on
+    the y-axis (ranging from ~20 to ~130). Each department has a distinct colored
+    box: Engineering in steel blue (#306998), Marketing in yellow (#FFD43B), Sales
+    in lighter blue (#4B8BBE), and Support in orange (#E8A838). The boxes show median
+    lines, quartile boxes, and whiskers extending to 1.5*IQR. Outliers are visible
+    as gray circular points - Engineering has one low outlier (~43), Marketing has
+    one high outlier (~104), and Sales has three outliers (one very high at ~127,
+    one at ~104, and one very low at ~21). The title "box-basic · matplotlib · pyplots.ai"
+    appears at the top. A subtle horizontal grid with dashed lines aids readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Boxes well-sized, outliers clearly visible with appropriate marker
+          size
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but blue shades (Engineering/Sales) are
+          somewhat similar
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, proper use of tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (Performance Score could have units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3, y-axis only which is appropriate; no legend
+          needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box-and-whisker plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median line, quartile boxes, whiskers at 1.5*IQR, outliers shown,
+          different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; category labels serve this purpose
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "box-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows outliers, different distributions (narrow for Marketing, wide
+          for Sales), varying medians; could show more extreme differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department performance scores is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores in 20-130 range; some outliers above 100 are slightly unrealistic
+          for a 0-100 score context
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses tick_labels instead of deprecated labels parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses patch_artist for filled boxes and customizes flierprops/medianprops/whiskerprops,
+          but does not use more advanced matplotlib features like violin overlays
+          or swarm points
+  verdict: APPROVED
diff --git a/plots/box-basic/metadata/plotly.yaml b/plots/box-basic/metadata/plotly.yaml
index 924b112c7b..d110584c1a 100644
--- a/plots/box-basic/metadata/plotly.yaml
+++ b/plots/box-basic/metadata/plotly.yaml
@@ -26,3 +26,177 @@ review:
   - The $5k outlier in Sales is unrealistically low for annual salary data
   - Does not leverage Plotly-specific features like custom hover templates or quartile
     annotations
+  image_description: 'The plot displays 5 box plots showing salary distributions across
+    departments (Engineering, Marketing, Sales, HR, Finance). Each box uses a different
+    color: Engineering (muted blue), Marketing (yellow), Sales (blue), HR (light yellow),
+    Finance (gray). The title "box-basic · plotly · pyplots.ai" is centered at the
+    top. The Y-axis shows "Annual Salary ($)" with values ranging from $0 to $140,000,
+    formatted with dollar signs and commas. The X-axis shows "Department" with 5 category
+    labels. Each box shows the median line, quartile box, and whiskers. Outliers are
+    visible as individual points - notably two outliers in Sales (one very high ~$147k,
+    one very low ~$5k) and one in Engineering (~$57k). The background is clean white
+    with subtle horizontal gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 20pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box plots well-sized with clear median lines, whiskers, and visible
+          outlier points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable, though yellow boxes (Marketing, HR) are
+          somewhat similar
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, adequate margins, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Annual Salary ($)" with unit, X-axis has "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but no legend present (acceptable since showlegend=False
+          and colors are decorative)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median line shown, outliers displayed as points, whiskers present,
+          different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range shown including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled appropriately (categories labeled on x-axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "box-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows outliers (especially in Sales), different distributions (Engineering
+          highest, HR lowest), varying spreads (Sales widest, HR narrowest)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary by department is a real, comprehensible scenario with plausible
+          relationships
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Mostly realistic salaries, though the $5k outlier in Sales is unrealistically
+          low for annual salary
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Basic go.Box usage without leveraging Plotly-specific features like
+          hover templates, annotations, or interactive elements that would be visible
+          in the HTML output
+  verdict: APPROVED
diff --git a/plots/box-basic/metadata/plotnine.yaml b/plots/box-basic/metadata/plotnine.yaml
index 77d4c10d0e..8a8ef120b0 100644
--- a/plots/box-basic/metadata/plotnine.yaml
+++ b/plots/box-basic/metadata/plotnine.yaml
@@ -24,3 +24,171 @@ review:
   - No subtle grid lines to aid value reading (theme_minimal removes them by default)
   - Y-axis starting at 0 creates unnecessary whitespace since no salaries are near
     zero
+  image_description: 'The plot displays a box-and-whisker chart showing salary distributions
+    across four departments (Engineering, Marketing, Sales, Support). Each department
+    has a distinctly colored box: Engineering (coral/salmon), Marketing (olive green),
+    Sales (cyan/turquoise), and Support (purple/violet). The boxes show median lines,
+    quartiles, and whiskers extending to 1.5×IQR. Outliers are visible as individual
+    gray points above several boxes (Engineering, Marketing, Sales, Support). The
+    title "box-basic · plotnine · pyplots.ai" appears at the top. X-axis labeled "Department"
+    and Y-axis labeled "Salary ($)" with values ranging from 0 to 150000.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box plots are well-sized, outliers visible with appropriate alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but not from a standard colorblind-safe
+          palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, minor whitespace at bottom left
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units ("Salary ($)", "Department")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No visible grid, legend hidden (acceptable since categories on x-axis)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box-and-whisker plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median lines, quartiles, whiskers at 1.5×IQR, outliers as points,
+          different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden (categories shown on x-axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "box-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions, outliers present, varying spreads;
+          could show more outlier variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary by department is a real, comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Salary values are realistic; y-axis starting at 0 creates some empty
+          space
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: verbose=False is non-standard parameter
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with geom_boxplot, theme_minimal, element_text
+          customization; could leverage scale_fill_brewer for better palettes
+  verdict: APPROVED
diff --git a/plots/box-basic/metadata/pygal.yaml b/plots/box-basic/metadata/pygal.yaml
index 8bc8379672..977bfd040c 100644
--- a/plots/box-basic/metadata/pygal.yaml
+++ b/plots/box-basic/metadata/pygal.yaml
@@ -22,3 +22,171 @@ review:
   - Legend box size is small relative to the large chart dimensions, making it less
     prominent
   - Outlier markers could be slightly larger for better visibility at this resolution
+  image_description: 'The plot displays a box plot (box-and-whisker) showing salary
+    distributions across 5 departments: Engineering (blue), Marketing (yellow), Sales
+    (green), Operations (orange), and HR (purple). Each department has a distinct
+    colored box showing the interquartile range with a median line. Whiskers extend
+    to show the data range, and outliers are displayed as small dots outside the whiskers.
+    The title "box-basic · pygal · pyplots.ai" appears at the top. The y-axis shows
+    "Salary ($)" ranging from 0 to ~135,000, and the x-axis label "Department" appears
+    at the bottom. A legend at the bottom identifies each department color. The background
+    is white with subtle horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, though tick labels could be slightly
+          larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Boxes are clearly visible, outlier dots are small but distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses distinct colors that are colorblind-safe (blue, yellow, green,
+          orange, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with legend at bottom, appropriate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Salary ($)" with units, X-axis has "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle but legend box sizes appear small relative to the
+          chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows median, quartiles, whiskers, and outliers as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 departments
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "box-basic · pygal · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions, varying spreads, and outliers in Engineering
+          and Sales
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary by department is a realistic, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary values ($40k-$135k) are realistic for department salaries
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style are imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses box_mode="tukey" which is good, but could leverage more pygal-specific
+          styling or interactivity features
+  verdict: APPROVED
diff --git a/plots/box-basic/metadata/seaborn.yaml b/plots/box-basic/metadata/seaborn.yaml
index 7b6c0e1ffe..c0bcec957a 100644
--- a/plots/box-basic/metadata/seaborn.yaml
+++ b/plots/box-basic/metadata/seaborn.yaml
@@ -23,3 +23,171 @@ review:
   - Does not leverage seaborn distinctive features like stripplot overlay, violin
     plots, or statistical annotations
   - Yellow color for Marketing category could have slightly better contrast
+  image_description: 'The plot displays a box-and-whisker chart showing salary distributions
+    across 5 departments (Engineering, Marketing, Sales, HR, Finance). Each department
+    has a distinctly colored box: Engineering (blue), Marketing (yellow/gold), Sales
+    (green), HR (orange), and Finance (purple). The y-axis shows "Salary ($)" ranging
+    from approximately $20K to $160K with currency formatting. The x-axis shows "Department"
+    labels. Each box clearly displays the median line, quartile boundaries, whiskers
+    extending to 1.5*IQR, and outliers shown as hollow circles. The title correctly
+    reads "box-basic · seaborn · pyplots.ai". A subtle dashed horizontal grid helps
+    with value reading. The layout is clean with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Boxes well-sized (width=0.6), outliers clearly visible (fliersize=10)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Distinct colors for each category, though yellow may be slightly
+          harder to see
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, proper use of tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Salary ($)" with unit indicator, "Department" descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha=0.3), but y-axis only; no legend needed as colors
+          are self-explanatory
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median line, quartile box, whiskers at 1.5*IQR, outliers as points,
+          different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; category colors match x-axis labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "box-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions (Engineering high/tight, Sales wide
+          spread, HR lower/tight), multiple outliers visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary by department is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary ranges $40K-$160K are realistic for US corporate salaries
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, pandas, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses hue with palette to avoid seaborn 0.14+ warning
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.boxplot which is basic; could have used violin overlay,
+          swarmplot, or statistical annotations
+  verdict: APPROVED
diff --git a/plots/box-grouped/metadata/altair.yaml b/plots/box-grouped/metadata/altair.yaml
index 3ae037c02d..7477358945 100644
--- a/plots/box-grouped/metadata/altair.yaml
+++ b/plots/box-grouped/metadata/altair.yaml
@@ -25,3 +25,178 @@ review:
     plot area
   - Grid styling uses gridDash which works but default subtle grid might be cleaner
   - Could add tooltips for interactivity since Altair excels at this
+  image_description: 'The plot displays a grouped box plot comparing employee performance
+    scores across four departments (Engineering, Marketing, Sales, Support). Within
+    each department, three side-by-side box plots represent experience levels: Junior
+    (deep blue #306998), Mid (yellow #FFD43B), and Senior (teal #4ECDC4). The y-axis
+    shows "Performance Score (%)" ranging from 0 to 105. Each box plot shows the median
+    (white horizontal line), interquartile range (box), whiskers extending to ~1.5×IQR,
+    and outliers as hollow circles. A legend on the right identifies the experience
+    levels. The title "box-grouped · altair · pyplots.ai" is centered at the top.
+    The plot demonstrates clear progression where Senior employees consistently score
+    higher than Mid, who score higher than Junior across all departments.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Box plots well-sized (size=60), outliers visible (size=80), slight
+          deduction as some boxes could be slightly wider for better distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/teal palette is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Performance Score (%)" includes units, "Department" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), but legend is positioned far
+          from the data in isolated space on the right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped box plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Department on x-axis, Performance Score on y-axis, Experience Level
+          for grouping
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Side-by-side boxes, distinct colors, legend present, median lines,
+          whiskers, outliers all shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-105, capturing all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Junior, Mid, Senior
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "box-grouped · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied distributions, different medians, spreads, and outliers;
+          slight deduction as outliers appear in limited positions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department and experience is a real,
+          comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Performance scores 0-100 are realistic; minor deduction as some distributions
+          seem slightly compressed at the top (clipping at 100)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also saves plot.html (minor, but only png requested
+          for this review)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding with xOffset for grouping, mark_boxplot
+          with customization (median stroke, outlier size), but could leverage more
+          interactive features or tooltips
+  verdict: APPROVED
diff --git a/plots/box-grouped/metadata/bokeh.yaml b/plots/box-grouped/metadata/bokeh.yaml
index c37755012b..0905747eca 100644
--- a/plots/box-grouped/metadata/bokeh.yaml
+++ b/plots/box-grouped/metadata/bokeh.yaml
@@ -28,3 +28,176 @@ review:
   - One outlier value (105) slightly exceeds typical 100-point performance scale
   - Could leverage more Bokeh-specific interactive features (HoverTool would enhance
     the plot)
+  image_description: 'The plot displays a grouped box plot comparing employee performance
+    scores across 4 departments (Sales, Engineering, Marketing, Support) with 3 experience
+    levels (Junior in blue #306998, Senior in yellow #FFD43B, Lead in teal #4ECDC4).
+    Each department shows 3 side-by-side box plots with clear median lines, IQR boxes,
+    whiskers extending to 1.5×IQR, and outliers displayed as colored circles. The
+    title "box-grouped · bokeh · pyplots.ai" is centered at the top. A legend on the
+    right side clearly identifies the subcategories. The grid uses subtle dashed lines
+    with low alpha. Notable outliers appear in Engineering Lead (~38, ~58) and Support
+    Lead (~65-69). The y-axis ranges from 30-110 showing Performance Score, and all
+    text is clearly readable.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, boxes well-spaced within groups
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box widths and outlier markers appropriately sized for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Teal palette is colorblind-friendly, though contrast
+          between yellow boxes and white background could be slightly better
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Department", "Performance Score") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid with alpha 0.3, legend well-placed on right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped box plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis, subcategories as groups
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows median, quartiles, whiskers at 1.5×IQR, outliers, distinct
+          colors, legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 30-110 shows all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Junior/Senior/Lead
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "box-grouped · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows outliers, different medians, varying spreads across groups
+          - minor: could show more dramatic distribution differences'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department and experience level is
+          a real, comprehensible HR scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Performance scores 40-100 are realistic; the 105 outlier slightly
+          exceeds typical 100-point scales
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Has a helper function `calc_boxplot_stats` which deviates from pure
+          KISS, but it's reasonable for box plot calculations
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, figure with proper categorical x_range, vbar
+          for boxes, segment for whiskers. Good usage but could leverage more Bokeh-specific
+          features like HoverTool for interactivity
+  verdict: APPROVED
diff --git a/plots/box-grouped/metadata/highcharts.yaml b/plots/box-grouped/metadata/highcharts.yaml
index 9543cd5012..e84a16f489 100644
--- a/plots/box-grouped/metadata/highcharts.yaml
+++ b/plots/box-grouped/metadata/highcharts.yaml
@@ -23,3 +23,180 @@ review:
     (no functions/classes rule)
   - Outliers are not displayed as separate points (spec requests outliers to be shown)
   - Legend could be positioned closer to the chart area
+  image_description: 'The plot displays a grouped box plot visualization with 4 department
+    categories (Engineering, Sales, Marketing, Finance) on the x-axis and Performance
+    Score (range 20-100) on the y-axis. Each department contains 3 side-by-side box
+    plots colored distinctly: blue (Junior), yellow (Mid-Level), and purple (Senior).
+    The title correctly reads "box-grouped · highcharts · pyplots.ai" in bold black
+    text with a gray subtitle "Employee Performance Scores by Department and Experience
+    Level". A vertical legend in the top-right identifies the three experience levels.
+    All boxes show clear median lines (black), quartile boxes with fill colors matching
+    the legend, and whiskers extending to data bounds. The distributions vary meaningfully
+    across groups - e.g., Sales Junior has a wide spread down to ~36, while Finance
+    Senior reaches up to 100.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable.
+          Font sizes are appropriate for 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Category labels are well-spaced, legend
+          doesn't overlap data.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Box plots are clearly visible with good line widths. Boxes could
+          be slightly wider for optimal visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, purple palette is colorblind-safe. No red-green conflicts.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space. Slight excess whitespace at bottom due
+          to y-axis starting at 20.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Department" and "Performance Score" are descriptive labels.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle. Legend placement is good but could be closer to the
+          chart.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped box plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis, subcategories as grouped
+          series
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Side-by-side boxes, distinct colors, legend, median/quartile/whiskers
+          shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 20-100 shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Junior, Mid-Level, Senior
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-grouped · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied distributions, different medians, spread differences.
+          Missing explicit outliers beyond whiskers.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance by department/experience is a highly realistic
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Performance scores 20-100 are reasonable. Some extreme values at
+          boundaries (100) are slightly artificial.
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Contains a helper function `calc_boxplot_stats()` which violates
+          KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Properly uses BoxPlotSeries, Highcharts styling options, inline JS
+          embedding for Selenium rendering
+  verdict: APPROVED
diff --git a/plots/box-grouped/metadata/letsplot.yaml b/plots/box-grouped/metadata/letsplot.yaml
index c0673e2cf2..5c1b382d23 100644
--- a/plots/box-grouped/metadata/letsplot.yaml
+++ b/plots/box-grouped/metadata/letsplot.yaml
@@ -28,3 +28,175 @@ review:
     - minor label inconsistency
   - Does not leverage lets-plot interactive features in the HTML export (tooltips
     would enhance the visualization)
+  image_description: 'The plot displays a grouped box plot with 4 departments (Engineering,
+    Marketing, Sales, Operations) on the x-axis and Performance Score (%) on the y-axis
+    ranging from approximately 20 to 110. Within each department, there are 3 side-by-side
+    box plots representing experience levels: Junior (blue #306998), Mid-Level (yellow
+    #FFD43B), and Senior (green #4CAF50). The boxes show clear median lines, quartile
+    boxes, whiskers, and outliers displayed as gray circles. The title reads "box-grouped
+    · letsplot · pyplots.ai" in bold at the top. A legend on the right identifies
+    the three experience levels. The plot uses a minimal theme with a light gray background
+    and subtle grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (28pt), axis labels are 22pt, tick labels
+          are 18pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clear and distinct
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box plots are well-sized with appropriate alpha (0.85), outliers
+          are visible with size=3
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable (blue, yellow, green) but blue-green could
+          be slightly problematic for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot is well-centered with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Performance Score (%)" with units, X-axis has "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is present but legend title says "Experience Level" while legend
+          fill label in code says "fill" - minor inconsistency
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped box plot with side-by-side boxes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, subcategories as fill color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows median, quartiles, whiskers, and outliers as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Junior, Mid-Level, Senior
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "box-grouped · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows outliers, different distributions, varying medians and spreads
+          across groups. Minor deduction: some distributions very similar'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department and experience level is
+          a realistic HR scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Performance scores 20-110% - scores over 100% are slightly unusual
+          for a percentage metric
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" but path="." is unusual (should work but non-standard)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses lets-plot grammar of graphics appropriately with theme_minimal,
+          scale_fill_manual, but doesn't leverage unique lets-plot features like interactive
+          tooltips in HTML output
+  verdict: APPROVED
diff --git a/plots/box-grouped/metadata/matplotlib.yaml b/plots/box-grouped/metadata/matplotlib.yaml
index 1417f2818d..3434943416 100644
--- a/plots/box-grouped/metadata/matplotlib.yaml
+++ b/plots/box-grouped/metadata/matplotlib.yaml
@@ -26,3 +26,172 @@ review:
   - Data pattern is quite uniform across departments (all show same Junior<Mid<Senior
     progression) - more inter-department variation would be more interesting
   - Could use matplotlib-specific features like notched boxes to show confidence intervals
+  image_description: 'The plot displays a grouped box plot showing employee performance
+    scores (0-100) across 4 departments (Sales, Engineering, Marketing, Support).
+    Each department has 3 side-by-side box plots representing experience levels: Junior
+    (dark blue #306998), Mid-Level (yellow #FFD43B), and Senior (teal #4ECDC4). The
+    boxes clearly show medians (dark horizontal lines), interquartile ranges, whiskers
+    extending to 1.5*IQR, and several outliers (visible as circles, e.g., around 20
+    in Engineering Junior, and in the 40 range for Support Mid-Level). A legend in
+    the upper left identifies the three subcategories. The title "box-grouped · matplotlib
+    · pyplots.ai" is displayed at the top. Y-axis shows "Performance Score (0-100)"
+    and X-axis shows "Department". A subtle horizontal grid aids readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16-18pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, boxes well-spaced within groups
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Boxes appropriately sized, outliers visible with good marker size
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/teal palette is colorblind-safe (no red-green issues)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "(0-100)", X-axis descriptive "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3), but legend could be positioned better
+          (upper right would avoid any potential overlap with data)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, subcategories as grouped boxes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Side-by-side boxes, distinct colors, legend, median/quartiles/whiskers/outliers
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-110 shows all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Junior/Mid-Level/Senior
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "box-grouped · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions (Junior wider spread, Senior narrower),
+          varying medians by experience level, outliers present in multiple groups
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Employee performance by department and experience is plausible, though
+          pattern is somewhat uniform across departments
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores 0-100 are sensible, distributions realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Standard boxplot with patch_artist for coloring, but could use additional
+          matplotlib features like notched boxes or custom styling
+  verdict: APPROVED
diff --git a/plots/box-grouped/metadata/plotly.yaml b/plots/box-grouped/metadata/plotly.yaml
index 72e6939c8c..3a10b23a71 100644
--- a/plots/box-grouped/metadata/plotly.yaml
+++ b/plots/box-grouped/metadata/plotly.yaml
@@ -27,3 +27,176 @@ review:
   - Does not leverage Plotly interactive features like custom hover templates showing
     exact statistics
   - Some outliers exceed 100, which is unusual for a 0-100 performance score scale
+  image_description: 'The plot displays a grouped box plot showing employee performance
+    scores (y-axis, ranging from ~30 to ~110) across four departments (Sales, Engineering,
+    Marketing, Support) on the x-axis. Each department contains three side-by-side
+    box plots representing experience levels: Junior (steel blue #306998), Mid-Level
+    (yellow #FFD43B), and Senior (teal #4ECDC4). The boxes show clear median lines,
+    quartile ranges, and whiskers. Outliers are visible as individual points (e.g.,
+    low outlier at ~30 for Sales Junior, high outlier at ~112 for Engineering Mid-Level).
+    The legend titled "Experience Level" is positioned to the right. The title correctly
+    reads "box-grouped · plotly · pyplots.ai" centered at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 20pt, legend at
+          18pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box plots well-sized with clear median lines, whiskers, and visible
+          outliers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, teal palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though right margin for legend is generous
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Department", "Performance Score") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha 0.1, legend well-placed but title could be more
+          prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped box plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis, subcategories as grouped
+          boxes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Side-by-side boxes, distinct colors, legend, median/quartiles/whiskers/outliers
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Junior, Mid-Level, Senior
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-grouped · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows outliers, varying distributions, different medians across groups;
+          could show more extreme distribution differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance by department and experience level is a realistic
+          HR analytics scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Performance scores 30-100+ are reasonable; the 112 outlier slightly
+          exceeds typical 0-100 scale
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses go.Box with boxmode="group" correctly; could leverage more Plotly
+          features like hover templates or animation
+  verdict: APPROVED
diff --git a/plots/box-grouped/metadata/plotnine.yaml b/plots/box-grouped/metadata/plotnine.yaml
index fff4c48e53..c09eba0400 100644
--- a/plots/box-grouped/metadata/plotnine.yaml
+++ b/plots/box-grouped/metadata/plotnine.yaml
@@ -24,3 +24,174 @@ review:
   - The two blue colors (Junior and Senior) are somewhat similar; a more distinct
     palette would improve differentiation
   - Y-axis label could include units (e.g., Performance Score (0-100))
+  image_description: 'The plot displays a grouped box plot showing employee performance
+    scores across four departments (Engineering, Marketing, Sales, Support) on the
+    x-axis, with Performance Score on the y-axis ranging from approximately 30 to
+    120. Three experience levels (Junior, Mid-Level, Senior) are shown side-by-side
+    within each department using distinct colors: dark blue (#306998) for Junior,
+    yellow (#FFD43B) for Mid-Level, and light blue (#4B8BBE) for Senior. The boxes
+    show medians, quartiles, and whiskers with outliers displayed as gray dots. The
+    title "box-grouped · plotnine · pyplots.ai" appears at the top. The legend on
+    the right identifies "Experience Level" with the three categories. The plot uses
+    a minimal theme with subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; department labels and boxes are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Boxes are well-sized and visible; outliers clearly marked with good
+          alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/blue palette provides good contrast; the two blues are
+          distinguishable but could be more distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Department", "Performance Score") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid, well-placed legend with clear title
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped box plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis, subcategories as fill
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Side-by-side boxes, legend, consistent widths, median/quartiles/whiskers/outliers
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies experience levels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "box-grouped · plotnine · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions across departments and experience levels,
+          includes outliers, varying spreads
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department and experience level is
+          a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores in 40-100 range are realistic for workplace metrics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Proper ggplot2 grammar with aes(), geom_boxplot(), position_dodge2(),
+          scale_fill_manual(), theme_minimal(), and detailed theme() customization
+  verdict: APPROVED
diff --git a/plots/box-grouped/metadata/seaborn.yaml b/plots/box-grouped/metadata/seaborn.yaml
index a36ff1a265..4b2ad06cce 100644
--- a/plots/box-grouped/metadata/seaborn.yaml
+++ b/plots/box-grouped/metadata/seaborn.yaml
@@ -24,3 +24,175 @@ review:
     and could be more distinct for colorblind accessibility'
   - Axis labels lack units (though Performance Score is inherently unitless, could
     add 0-100 for clarity)
+  image_description: 'The plot displays a grouped box plot with 4 departments (Engineering,
+    Marketing, Sales, Support) on the x-axis and Performance Score (20-100) on the
+    y-axis. Each department has 3 side-by-side boxes representing experience levels:
+    Junior (dark blue #306998), Mid-Level (gold/yellow #FFD43B), and Senior (light
+    blue #4B8BBE). The boxes show clear median lines, quartile boundaries, whiskers
+    extending to ~1.5×IQR, and several outliers displayed as small circles. The title
+    "box-grouped · seaborn · pyplots.ai" is prominently displayed at the top. A legend
+    in the upper right identifies the three experience levels. The plot uses a white
+    background with subtle horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box widths well-proportioned, outliers visible with fliersize=8
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good contrast between colors, though blue/blue-gray Senior vs Junior
+          could be slightly more distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Department", "Performance Score") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha=0.3, legend well-placed but title "Experience
+          Level" slightly differs from data column "Experience"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct grouped box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis, subcategories as hue
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Side-by-side boxes, distinct colors, legend, consistent widths, median/quartiles/whiskers/outliers
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range 15-105 shows all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Junior, Mid-Level, Senior
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "box-grouped · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows varying distributions (different medians, spreads), multiple
+          outliers visible, demonstrates the progression from Junior to Senior across
+          departments
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department and experience level is
+          a realistic HR analytics scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores 20-100 are sensible, distributions show realistic
+          patterns (seniors generally higher)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with hue parameter correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Leverages seaborn's sns.boxplot with hue grouping, set_style for
+          theming, custom palette integration
+  verdict: APPROVED
diff --git a/plots/box-horizontal/metadata/altair.yaml b/plots/box-horizontal/metadata/altair.yaml
index 883d42ad8e..66c0faf2d6 100644
--- a/plots/box-horizontal/metadata/altair.yaml
+++ b/plots/box-horizontal/metadata/altair.yaml
@@ -25,3 +25,174 @@ review:
   - Could leverage Altair interactive() method for tooltip exploration in the HTML
     output
   - Slight excess whitespace on right side due to outlier-extended scale
+  image_description: The plot displays a horizontal box plot showing response time
+    distributions for 6 different service types. The boxes are rendered in a blue
+    color (#306998) with yellow median lines. Services are displayed on the y-axis
+    (Cache Lookup, Authentication, API Gateway, Database Query, Email Service, File
+    Storage) sorted by median value from fastest to slowest. The x-axis shows "Response
+    Time (ms)" ranging from 0 to ~500ms. Each service shows a box representing the
+    IQR, whiskers extending to the data range, and circular outlier points. The title
+    "box-horizontal · altair · pyplots.ai" appears at the top center. The layout is
+    clean with no overlapping elements, and outliers are clearly visible for each
+    service category.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; service names are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Box plots are clearly visible; outliers are appropriately sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe; yellow median provides
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight excess whitespace on right due to
+          outlier scaling
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" includes units; "Service Type" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid present (acceptable for box plots); no legend needed for
+          single-variable encoding
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal box plot orientation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, numeric values on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median lines, quartile boxes, whiskers, and outliers all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data including outliers fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; encoding is clear
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses `box-horizontal · altair · pyplots.ai` format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows outliers, different distributions, varying spreads; categories
+          sorted by median as recommended
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Response times by service type is a realistic, neutral business/tech
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for API response times (15-500ms range)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses mark_boxplot with customization, proper encoding types, and
+          HTML export; could use interactive() for tooltip exploration
+  verdict: APPROVED
diff --git a/plots/box-horizontal/metadata/bokeh.yaml b/plots/box-horizontal/metadata/bokeh.yaml
index cf45e60fc7..600f44d9c5 100644
--- a/plots/box-horizontal/metadata/bokeh.yaml
+++ b/plots/box-horizontal/metadata/bokeh.yaml
@@ -24,3 +24,172 @@ review:
     data
   - Median line implementation uses a narrow hbar with fixed width which may not scale
     well with different data ranges
+  image_description: 'The plot displays a horizontal box plot showing response times
+    (ms) for 5 different service types: Cache Layer, API Gateway, Authentication,
+    Database Query, and File Storage. The boxes are rendered in a steel blue color
+    (#306998) with bright yellow median lines. Categories are listed on the y-axis
+    (Service Type) and response times on the x-axis (0-300+ ms). Each box shows the
+    IQR with whiskers extending to the data range, and outliers are displayed as hollow
+    circles with blue outlines. The plot has a light gray background (#fafafa) with
+    subtle dashed grid lines. Categories are sorted by median response time from fastest
+    (Cache Layer ~15ms) to slowest (File Storage ~200ms). The title "box-horizontal
+    · bokeh · pyplots.ai" appears in the top-left.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable. Font sizes appropriate
+          for 4800x2700.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category names are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Boxes and whiskers clearly visible, outliers well-sized. Minor:
+          whisker caps could be slightly more prominent'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, though some empty space on the right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" and "Service Type" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with good alpha, no legend needed but grid could be
+          slightly more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, numeric values on X-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median line, quartile boxes, whiskers, outliers all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-color boxes
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "box-horizontal · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions, varying spreads, outliers in multiple
+          categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Service response times is a realistic, neutral tech scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times (15-300ms) are realistic for the described services
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: ColumnDataSource imported via figure but not explicitly used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Minor: could use more modern bokeh patterns'
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses bokeh's figure, hbar, scatter methods correctly. Could leverage
+          ColumnDataSource and HoverTool for more bokeh-idiomatic code
+  verdict: APPROVED
diff --git a/plots/box-horizontal/metadata/highcharts.yaml b/plots/box-horizontal/metadata/highcharts.yaml
index d325219162..e21a19e3be 100644
--- a/plots/box-horizontal/metadata/highcharts.yaml
+++ b/plots/box-horizontal/metadata/highcharts.yaml
@@ -23,3 +23,173 @@ review:
   - Data does not include explicit outlier points which would better demonstrate box
     plot capabilities
   - Vertical spacing between box plots could be tighter to reduce whitespace
+  image_description: 'The plot displays a horizontal box plot showing "Response Time
+    Distribution by Service Type". The title "box-horizontal · highcharts · pyplots.ai"
+    appears at the top in bold black text, with a gray subtitle below. Five service
+    categories are shown on the y-axis: API Gateway, Database Query, File Upload,
+    Authentication, and Payment Processing. The x-axis shows "Response Time (ms)"
+    ranging from 0 to 490. Each box plot is rendered with a blue fill color (Python
+    blue #306998) with transparency, blue outlines for boxes and whiskers, and yellow/gold
+    median lines. The boxes show different distributions - Authentication has the
+    smallest/fastest times, while File Upload has the largest spread extending to
+    ~490ms. A subtle gray grid helps with value reading. The layout is clean with
+    good spacing between elements.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels on y-axis are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box plots are well-sized with good pointWidth, whiskers clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Python blue palette with yellow median; colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but some excessive whitespace between categories
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Service Type" and "Response Time (ms)" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriately subtle, legend disabled (appropriate for single
+          series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal box plot using inverted chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows median, quartiles, whiskers (1.5*IQR)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled appropriately for single-series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-horizontal · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows different distributions and spreads, but no explicit outliers
+          shown
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Response times by service type is an excellent, neutral, real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for API response times (5-490ms)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts inverted chart, BoxPlotSeries, proper styling options,
+          but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/box-horizontal/metadata/letsplot.yaml b/plots/box-horizontal/metadata/letsplot.yaml
index c3a0f151d5..2fe0fb4959 100644
--- a/plots/box-horizontal/metadata/letsplot.yaml
+++ b/plots/box-horizontal/metadata/letsplot.yaml
@@ -24,3 +24,176 @@ review:
   - Does not leverage lets-plot distinctive features like interactive tooltips or
     hover information
   - Minor empty space on right side of plot due to outlier distribution
+  image_description: 'The plot displays a horizontal box plot showing response times
+    (in milliseconds) for 6 different service types: API Gateway, Database Query,
+    Authentication, File Storage, Cache Lookup, and Email Service. The boxes are rendered
+    in a muted blue color (#306998) with darker outlines. Outliers are displayed as
+    yellow/gold circles. The x-axis shows "Response Time (ms)" ranging from 0 to ~750ms,
+    while the y-axis shows "Service Type" with category labels. The title "box-horizontal
+    · letsplot · pyplots.ai" appears at the top. The plot uses a minimal theme with
+    subtle vertical grid lines and no horizontal grid lines. Each service shows distinct
+    distributions - Cache Lookup has the tightest distribution near 0-50ms, while
+    Database Query and File Storage show wider distributions with outliers extending
+    to 600-750ms.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; service names on y-axis are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box plots are well-sized, outliers clearly visible with contrasting
+          yellow color
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue boxes with yellow outliers provide good contrast; colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space, minor: some empty space on right side
+          due to outliers'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has units "Response Time (ms)", Y-axis is descriptive "Service
+          Type"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Vertical grid lines are present but quite prominent; no legend needed
+          for this plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, numeric values on X-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows median, quartiles, whiskers, and outliers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; categories are self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "box-horizontal · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: different medians, spreads, and outlier patterns
+          across services'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Response times by service type is a realistic, neutral tech scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times in ms are realistic (15ms for cache to 300ms for file
+          storage)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Basic ggplot grammar usage without lets-plot specific features like
+          tooltips or interactivity configuration
+  verdict: APPROVED
diff --git a/plots/box-horizontal/metadata/matplotlib.yaml b/plots/box-horizontal/metadata/matplotlib.yaml
index 4bfc56aab2..a2a1be9a92 100644
--- a/plots/box-horizontal/metadata/matplotlib.yaml
+++ b/plots/box-horizontal/metadata/matplotlib.yaml
@@ -23,3 +23,178 @@ review:
   weaknesses:
   - Grid only on x-axis; adding subtle y-axis grid lines could help visually align
     categories across the plot
+  image_description: 'The plot displays a horizontal box plot showing response times
+    (in milliseconds) for 6 different service types. The boxes are rendered in a muted
+    blue color (#306998) with yellow/gold median lines (#FFD43B). The y-axis shows
+    service categories: Authentication Service, Database Query, File Upload, Payment
+    Processing, Email Notification, and Image Processing. The x-axis shows Response
+    Time (ms) ranging from approximately 0 to 900+ms. Outliers are displayed as yellow
+    circles with blue edges. The plot has a subtle dashed grid on the x-axis. The
+    title "box-horizontal · matplotlib · pyplots.ai" appears at the top. Each service
+    shows different distribution characteristics - Email Notification has a tight
+    distribution around 80ms, while Image Processing has a wide uniform spread from
+    ~300-800ms. Payment Processing shows right-skewed data with multiple outliers
+    extending beyond 800ms.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels fit well on y-axis
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box widths appropriate, outliers clearly visible with good marker
+          size
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Response Time (ms)",
+          "Service Type"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate (alpha=0.3), but x-axis only grid
+          could benefit from y-axis grid lines for alignment
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal box plot with boxes oriented horizontally
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, numeric values on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows median line, quartile box, whiskers, and outliers as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes accommodate outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this single-series box plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-horizontal · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: tight distributions (Email), wide spreads (Image
+          Processing), outliers (Database Query, Payment Processing), skewed data
+          (Payment Processing)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Service response times is a realistic, neutral tech scenario perfectly
+          suited for comparing distributions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Response times in ms are realistic: 80ms for email, 150ms for auth,
+          500ms for file upload'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses tick_labels (correct) instead of deprecated labels parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" (correct)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of patch_artist for filled boxes, comprehensive props customization
+          (flierprops, medianprops, whiskerprops, capprops, boxprops)
+  verdict: APPROVED
diff --git a/plots/box-horizontal/metadata/plotly.yaml b/plots/box-horizontal/metadata/plotly.yaml
index d090492acb..a4a5ccec7c 100644
--- a/plots/box-horizontal/metadata/plotly.yaml
+++ b/plots/box-horizontal/metadata/plotly.yaml
@@ -25,3 +25,175 @@ review:
     readability
   - Could add hover template customization to show more statistics on hover (plotly
     strength)
+  image_description: The plot displays a horizontal box plot showing response times
+    (in milliseconds) for 6 different service types. The services are arranged vertically
+    on the y-axis (Cache Lookup, Authentication, Message Queue, API Gateway, Database
+    Query, File Storage) sorted by median response time from fastest to slowest. The
+    x-axis shows Response Time (ms) ranging from 0 to 500. Boxes alternate between
+    Python blue (#306998) and Python yellow (#FFD43B) colors. Each box shows the quartile
+    distribution with whiskers extending to show data range, and outlier points are
+    visible for most services. The title "box-horizontal · plotly · pyplots.ai" is
+    centered at the top. The background is clean white with subtle gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, service names well-spaced on y-axis
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Box plots clearly visible, outliers properly sized at 8px, opacity
+          0.7 works well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow alternating is colorblind-safe, though contrast could
+          be slightly better
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, left margin of 180px accommodates service names
+          well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" includes units, "Service Type" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (0.1 alpha), but no legend needed here; however grid
+          could be slightly more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal box plot with boxes oriented horizontally
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, numeric values on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows median line, quartile box, whiskers, and outliers as required
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes appropriately scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, categories clearly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "box-horizontal · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: different spreads, medians, and outlier patterns
+          across services'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Response times by service type is a realistic scenario, values are
+          plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times range from ~15ms (cache) to ~500ms (file storage)
+          - realistic for services
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png AND plot.html (correct for plotly)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Box with orientation="h", proper plotly_white template, but
+          could leverage more interactive features or hover customization
+  verdict: APPROVED
diff --git a/plots/box-horizontal/metadata/plotnine.yaml b/plots/box-horizontal/metadata/plotnine.yaml
index 6d312f898b..a76c97c61b 100644
--- a/plots/box-horizontal/metadata/plotnine.yaml
+++ b/plots/box-horizontal/metadata/plotnine.yaml
@@ -25,3 +25,173 @@ review:
     which is incorrect'
   - Y-axis label is empty string instead of descriptive label like Service Type
   - Could use more distinctive plotnine features like faceting or statistical transformations
+  image_description: 'The plot displays a horizontal box plot showing response times
+    (ms) for 5 service types. Services are arranged vertically on the y-axis (sorted
+    by median): Database Query Handler (bottom, blue), Authentication Service (yellow),
+    Email Notification (light blue), Payment Gateway (gray), and File Storage API
+    (top, light yellow). The x-axis shows Response Time (ms) ranging from 0 to 400.
+    Each box shows the IQR with median line, whiskers extend to 1.5*IQR, and outliers
+    are visible as dots for Database Query Handler (around 220-280ms) and Payment
+    Gateway (around 350-380ms). The title "box-horizontal · plotnine · pyplots.ai"
+    is displayed in bold at the top. The background uses a minimal theme with subtle
+    grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (~24pt), axis labels are clearly readable
+          (~20pt), tick labels are appropriately sized (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; service names are well-spaced on the y-axis
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box plots are appropriately sized, outliers are visible with good
+          alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Python-themed palette is distinguishable but could have better contrast
+          between some colors
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot fills appropriate area with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has units "Response Time (ms)" but Y-axis is empty (no label)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but there's an issue with panel_grid_major_y
+          using element_text instead of element_line
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal box plot using coord_flip()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, numeric values on X-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All box plot elements present: median, quartiles, whiskers, outliers'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly hidden since colors map to y-axis categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-horizontal · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions, varying spreads, and outliers for
+          multiple services
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Service response times is a realistic, neutral tech scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times 50-400ms are realistic for API services
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic ggplot grammar but no distinctive plotnine features beyond
+          standard geom_boxplot + coord_flip
+  verdict: APPROVED
diff --git a/plots/box-horizontal/metadata/seaborn.yaml b/plots/box-horizontal/metadata/seaborn.yaml
index df1501eee4..ac1db25b97 100644
--- a/plots/box-horizontal/metadata/seaborn.yaml
+++ b/plots/box-horizontal/metadata/seaborn.yaml
@@ -23,3 +23,175 @@ review:
   - Color alternation (blue/yellow) is decorative rather than conveying meaning -
     a single color or sequential palette might be cleaner
   - Could use sns.set_context() for more consistent seaborn styling
+  image_description: 'The plot displays a horizontal box plot showing response times
+    (in milliseconds) for 5 different service types. The boxes are oriented horizontally
+    with categories on the y-axis (Cache Lookup, API Gateway, Authentication, Database
+    Query, File Upload - sorted by median value) and response time on the x-axis (0-1100ms).
+    Colors alternate between blue (#306998) and yellow (#FFD43B). Each box shows the
+    median line, quartile box, whiskers extending to ~1.5×IQR, and outliers as hollow
+    circles. The title "box-horizontal · seaborn · pyplots.ai" is displayed at the
+    top. Grid lines are subtle dashed vertical lines. The plot demonstrates varying
+    distributions: Cache Lookup is very tight and fast (~25ms), while File Upload
+    has a wide spread (400-600ms) with outliers near 1000ms.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box widths, whiskers, and outlier markers appropriately sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is good and colorblind-friendly, though alternating
+          pattern is decorative rather than meaningful
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" and "Service Type" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend needed/present - appropriate
+          for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, numeric values on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Median line, quartile box, whiskers, outliers all present; categories
+          sorted by median
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-variable box plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "box-horizontal · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows outliers, different distributions (tight vs wide), varying
+          medians, different spreads
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Service response times is plausible, but values somewhat simplified
+          (e.g., File Upload always being slowest is expected but could be more nuanced)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times in ms are realistic for web services
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: pandas imported and used, though data could be structured without
+          it
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's boxplot with proper hue/palette handling, order parameter,
+          and flierprops customization. Could leverage more seaborn-specific features
+          like style context.
+  verdict: APPROVED
diff --git a/plots/box-notched/metadata/altair.yaml b/plots/box-notched/metadata/altair.yaml
index 0d06280e4f..ac6dd480f0 100644
--- a/plots/box-notched/metadata/altair.yaml
+++ b/plots/box-notched/metadata/altair.yaml
@@ -24,3 +24,183 @@ review:
   - Sales data contains a value (~110) that exceeds typical 100-point performance
     score scale
   - Y-axis extends beyond necessary range (0-120 when data maxes at ~110)
+  image_description: 'The plot displays four notched box plots representing employee
+    performance scores across four departments: Engineering (Python blue #306998),
+    Marketing (yellow #FFD43B), Operations (orange/red #E85C41), and Sales (light
+    blue #4B8BBE). Each box shows the characteristic notched shape around the median,
+    with white median lines visible inside the narrowed notch sections. Whiskers extend
+    from Q1/Q3 to the furthest non-outlier values with horizontal caps. Outliers are
+    displayed as colored circular points matching their department colors - Engineering
+    has one low outlier (~57), Marketing has outliers at both extremes (~35 and ~97-98),
+    Operations has one low outlier (~30), and Sales has one high outlier (~110). The
+    title "box-notched · altair · pyplots.ai" is centered at the top. The Y-axis is
+    labeled "Performance Score" (range 0-120) and the X-axis is labeled "Department".
+    A subtle gray grid with low opacity aids readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at large
+          font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, department labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Boxes, notches, whiskers, and outliers all clearly visible; slight
+          deduction as some outlier points could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, orange, light blue palette is colorblind-safe with
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though y-axis extends to 120 when data
+          maxes around 110
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (Performance Score could have context
+          like "points" or "%" but acceptable for this metric)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend but colors differentiate
+          categories adequately
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct notched box plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Category on X-axis, numeric values on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Notches at 95% CI, median, quartiles, whiskers at 1.5×IQR, outliers
+          shown, different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; colors self-explanatory with axis labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-notched · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows outliers (multiple departments), different medians, varying
+          spreads, non-overlapping notches (Engineering vs Operations)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Employee performance scores across departments is plausible; slight
+          deduction as Sales has value >100 which is unusual for a score typically
+          capped at 100
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Most values realistic (50-100 range); Sales outlier at ~110 exceeds
+          typical 100-point scale
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Linear flow but uses loops for data preparation; acceptable given
+          Altair lacks native notched box support
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: altair, numpy, pandas'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of layered marks and declarative encoding, but since Altair
+          lacks native notched box plots, the implementation manually calculates statistics
+          rather than leveraging Altair's built-in statistical transforms
+  verdict: APPROVED
diff --git a/plots/box-notched/metadata/bokeh.yaml b/plots/box-notched/metadata/bokeh.yaml
index da2bb41dc1..cf024e50b3 100644
--- a/plots/box-notched/metadata/bokeh.yaml
+++ b/plots/box-notched/metadata/bokeh.yaml
@@ -26,3 +26,177 @@ review:
     scale
   - No legend mapping colors to departments (though categories on x-axis partially
     compensate)
+  image_description: 'The plot displays 5 notched box plots comparing employee performance
+    scores across departments (Engineering, Sales, Marketing, Operations, HR). Each
+    department has a distinct color: blue (Engineering), yellow (Sales), green (Marketing),
+    orange (Operations), and purple (HR). The notches are clearly rendered as triangular
+    indentations at the median level, providing visual confidence intervals. Whiskers
+    extend from each box with horizontal caps. Outliers are shown as hollow circles
+    with colored borders matching their respective departments - Sales shows several
+    outliers (~40, 48-50, 95-100), Operations has outliers (~42, ~107), and HR has
+    one outlier (~51). The title correctly displays "box-notched · bokeh · pyplots.ai".
+    The Y-axis is labeled "Performance Score" and X-axis "Department". The background
+    is a subtle light gray (#FAFAFA) with dashed horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, axis labels at 32pt, tick labels at 24-26pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Boxes well-sized, outliers visible at size 22, notches clearly defined
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors (blue, yellow, green, orange, purple) are colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("Performance Score", "Department") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Subtle dashed grid at alpha 0.3 is good, but no legend present (colors
+          not labeled)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct notched box plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Notches at 95% CI, median, quartiles, whiskers at 1.5×IQR, outliers
+          shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this plot type (categories on axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "box-notched · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows different distributions, varying spreads, outliers present;
+          minor: could show more dramatic median differences for notch comparison'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores (0-100) across departments is a realistic
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Performance scores in 40-100 range are realistic; some outliers exceed
+          100 which is slightly unrealistic for a 0-100 scale
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, bokeh components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but there's unused variable 'xs' (lines 135-146)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of ColumnDataSource for outliers, quad/patch/segment for
+          custom rendering, but could leverage more Bokeh-specific features like HoverTool
+          for interactivity
+  verdict: APPROVED
diff --git a/plots/box-notched/metadata/highcharts.yaml b/plots/box-notched/metadata/highcharts.yaml
index 39fb574ccd..2cafcf3740 100644
--- a/plots/box-notched/metadata/highcharts.yaml
+++ b/plots/box-notched/metadata/highcharts.yaml
@@ -23,3 +23,174 @@ review:
   - Notches represented as error bars rather than actual notched box shape (Highcharts
     limitation, acceptable workaround)
   - Legend text slightly small relative to the 4800x2700 canvas
+  image_description: 'The plot shows a notched box plot visualization with four treatment
+    groups: Placebo, Low Dose, Medium Dose, and High Dose arranged along the x-axis.
+    Each box uses a different color (blue for Placebo, yellow for Low Dose, purple
+    for Medium Dose, cyan for High Dose). The y-axis shows "Response Score" ranging
+    from approximately 10 to 108. Each box displays the standard box plot elements
+    (median as black line, Q1/Q3 as box boundaries, whiskers). Red horizontal error
+    bars overlay each box representing the 95% CI (notch) for median comparison. Red
+    circular markers indicate outliers - visible for Placebo (at ~15 and ~85), Low
+    Dose (at ~29, ~35, ~74, ~79), and Medium Dose (at ~30 and ~95). The title reads
+    "box-notched · highcharts · pyplots.ai" with a subtitle explaining the error bars.
+    A legend in the upper right shows Response Distribution, 95% CI (Notch), and Outliers.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are readable, though tick labels could be slightly
+          larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Box plots and error bars clearly visible; outlier markers could be
+          slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette used (blue, yellow, purple, cyan)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout, though some empty space at bottom of chart
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Score" on y-axis, "Treatment Group" label present (shown
+          in x-axis title)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend well-placed but slightly small
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot with notch representation via error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows median, quartiles, whiskers, outliers, and 95% CI; notches
+          shown as error bars rather than traditional notch shape
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data including outliers visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three series
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format but subtitle takes some attention from main title
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows outliers, different distributions, increasing medians across
+          dose groups
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial dose-response scenario is excellent and realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response scores 10-108 are reasonable for a clinical metric
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → calculations → config → render'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts boxplot with error bars and scatter overlay; good
+          use of multiple series types
+  verdict: APPROVED
diff --git a/plots/box-notched/metadata/letsplot.yaml b/plots/box-notched/metadata/letsplot.yaml
index d80669917e..2b9bcdfc01 100644
--- a/plots/box-notched/metadata/letsplot.yaml
+++ b/plots/box-notched/metadata/letsplot.yaml
@@ -22,3 +22,175 @@ review:
   weaknesses:
   - Outlier points use gray color instead of matching box fill colors, reducing visual
     cohesion
+  image_description: 'The plot displays a notched box plot comparing annual salaries
+    across 5 departments (Engineering, Marketing, Sales, Finance, Operations). Each
+    department has a distinctly colored box: Engineering (blue), Marketing (yellow),
+    Sales (green), Finance (red/coral), and Operations (purple). The boxes show clear
+    notches around the medians representing confidence intervals. Outliers are displayed
+    as gray circular points beyond the whiskers - notably visible in Marketing (high
+    outliers around $120-125K), Sales (both high outliers at $130-145K and low outliers
+    around $28-30K), Engineering (one low outlier ~$64K), and Finance (one low outlier
+    ~$55K). The y-axis shows "Annual Salary (USD)" ranging from ~30,000 to 150,000,
+    and the x-axis shows "Department". The title "box-notched · letsplot · pyplots.ai"
+    is centered at the top. The plot uses a clean minimal theme with light grid lines
+    on the y-axis only.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; department names well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box plots are well-sized, outliers visible with good alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors that work for colorblind viewers (blue, yellow, green,
+          red, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, well-balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Annual Salary (USD)", X-axis labeled "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate; legend hidden (acceptable since x-axis
+          labels identify categories) but minor deduction for not showing legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct notched box plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Notches present, outliers shown, whiskers at IQR, different colors
+          per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers from ~28K to ~145K
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Categories clearly identified via x-axis labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "box-notched · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied distributions: Engineering/Finance with overlapping
+          notches, outliers in multiple directions, different spreads'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department salary comparison is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary values are realistic ($28K-$145K range)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot2 grammar correctly with geom_boxplot(notch=True), but
+          could leverage more lets-plot specific features like tooltips for interactivity
+  verdict: APPROVED
diff --git a/plots/box-notched/metadata/matplotlib.yaml b/plots/box-notched/metadata/matplotlib.yaml
index 0d353cd6fc..5c8f0a6b63 100644
--- a/plots/box-notched/metadata/matplotlib.yaml
+++ b/plots/box-notched/metadata/matplotlib.yaml
@@ -24,3 +24,178 @@ review:
   - Colorblind-safe palette starting with Python blue
   weaknesses:
   - Axis labels could include units for full clarity (e.g., Performance Score (0-100))
+  image_description: 'The plot displays 5 notched box plots representing employee
+    performance scores across departments (Engineering, Sales, Marketing, Support,
+    HR). Each box has a distinct color: Engineering (Python blue #306998), Sales (yellow),
+    Marketing (teal/cyan), Support (coral/red), and HR (light green). The notches
+    around the medians are clearly visible, showing the 95% confidence intervals.
+    The title "box-notched · matplotlib · pyplots.ai" is at the top. Outliers are
+    shown as gray circular points - visible for Sales (2 outliers ~43-52), Support
+    (4 outliers including low ones at ~32-35 and high ones ~95-100). The y-axis shows
+    "Performance Score" ranging from ~30-100, and x-axis shows "Department". A helpful
+    annotation with an arrow points to Engineering''s box explaining that non-overlapping
+    notches suggest significant difference in medians. Grid lines are subtle on the
+    y-axis only.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, annotation well-placed
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box widths, notches, whiskers, and outliers all clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units ("Performance Score" could be "Performance
+          Score (0-100)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle at 0.3 alpha, y-axis only which is good; no legend
+          but category labels on x-axis suffice
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct notched box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Notches present, median/quartiles/whiskers shown, outliers displayed,
+          different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from ~30 to 100, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Department labels on x-axis are clear and accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "box-notched · matplotlib · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions (Engineering tight/high, Sales wide,
+          Marketing similar to Sales for overlapping notches, Support with outliers),
+          demonstrates the purpose of notched boxplots
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores across departments is a real, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores 0-100 with realistic distributions (means ~68-82)
+          and appropriate sample sizes (40-60 per group)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses tick_labels instead of deprecated labels parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses matplotlib's boxplot with notch=True, patch_artist for custom
+          colors, detailed flierprops/medianprops/whiskerprops customization, annotation
+          with arrow
+  verdict: APPROVED
diff --git a/plots/box-notched/metadata/plotly.yaml b/plots/box-notched/metadata/plotly.yaml
index 944c86bf73..b13b16d1c2 100644
--- a/plots/box-notched/metadata/plotly.yaml
+++ b/plots/box-notched/metadata/plotly.yaml
@@ -25,3 +25,178 @@ review:
     legend would improve accessibility
   - Could leverage Plotly interactive features more (custom hovertemplate showing
     statistics)
+  image_description: 'The plot displays 5 notched box plots showing employee salary
+    distributions across departments (Engineering, Marketing, Sales, HR, Finance).
+    Each department has a distinct color: Engineering (steel blue #306998), Marketing
+    (yellow #FFD43B), Sales (green #2CA02C), HR (purple #9467BD), and Finance (pink
+    #E377C2). The notches around the medians are clearly visible and properly shaped.
+    Outliers appear as individual circular markers beyond the whiskers - notably visible
+    for Engineering (high outliers around $145k-$150k and low around $42k), Sales
+    (high at $135k, low at $25k-$28k), HR (low around $33k), and Finance (high at
+    $140k, low at $45k). The title "box-notched · plotly · pyplots.ai" is centered
+    at the top. Y-axis displays "Annual Salary (USD)" with currency formatting ($20,000
+    to $140,000+). X-axis shows "Department" with all 5 department names clearly readable.
+    Background uses plotly_white template with subtle gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 20pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box plots appropriately sized, outlier markers visible (size=10)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors with good luminance variation, colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, minor excess whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Annual Salary (USD)" with currency unit, X-axis has "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but legend is hidden (showlegend=False)
+          - department names on x-axis serve this purpose adequately but no legend
+          present
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct notched box plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Notches present, outliers shown, whiskers at 1.5*IQR (Plotly default),
+          different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis labels serve as legend, correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "box-notched · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions (Engineering high/wide, HR narrow/low),
+          multiple outliers, varying sample sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Salary distribution by department is realistic scenario, though some
+          outliers are slightly exaggerated
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary values realistic ($25k-$150k range appropriate for corporate
+          salaries)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Plotly API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses go.Box with notched=True, but doesn't leverage Plotly's interactive
+          hover features in any meaningful way (static output). Could use hovertemplate
+          for enhanced tooltips.
+  verdict: APPROVED
diff --git a/plots/box-notched/metadata/plotnine.yaml b/plots/box-notched/metadata/plotnine.yaml
index c12ac94a29..3695c6df01 100644
--- a/plots/box-notched/metadata/plotnine.yaml
+++ b/plots/box-notched/metadata/plotnine.yaml
@@ -22,3 +22,175 @@ review:
   weaknesses:
   - Legend disabled - consider keeping for accessibility if plot is viewed without
     context
+  image_description: 'The plot displays a notched box plot showing Productivity Score
+    (points) across 5 departments: Engineering, Marketing, Sales, Support, and HR.
+    Each department has a distinct color: Engineering (Python blue #306998), Marketing
+    (yellow #FFD43B), Sales (teal #4A90A4), Support (purple #8B5A8C), and HR (muted
+    green #6B8E6B). The notches around medians are clearly visible on all boxes, with
+    varying IQR widths showing different distributions. Outliers are displayed as
+    individual points - Engineering has outliers at both high (~102) and low (~51)
+    ends, while HR shows a low outlier (~40). The title "box-notched · plotnine ·
+    pyplots.ai" appears at the top. Axis labels are "Department" (x-axis) and "Productivity
+    Score (points)" (y-axis). The plot uses a minimal theme with subtle gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title ~24pt, axis labels ~20pt, tick labels ~16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, department labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box widths appropriate, notches clearly visible, outliers well-sized
+          with alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors distinguishable, though yellow/teal could be closer for some
+          colorblind types
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units "(points)", X-axis "Department" is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend shown (legend_position="none"), but for this plot legend
+          is redundant since x-axis labels identify categories
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct notched box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Notches present, median/quartiles/whiskers at 1.5*IQR, outliers shown,
+          different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (colors mapped directly to x-axis categories)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{spec-id} · {library} · pyplots.ai" format exactly
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows outliers (Engineering, HR), varying distributions (Support
+          tight, Marketing wide), different medians for comparison
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee productivity scores across departments is a relatable, real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores in 40-100 range are realistic for performance metrics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Proper ggplot2 grammar with aes(), geom_boxplot(notch=True), scale_fill_manual,
+          theme_minimal, layered composition
+  verdict: APPROVED
diff --git a/plots/box-notched/metadata/pygal.yaml b/plots/box-notched/metadata/pygal.yaml
index 76c165f349..dbcbdd4c56 100644
--- a/plots/box-notched/metadata/pygal.yaml
+++ b/plots/box-notched/metadata/pygal.yaml
@@ -26,3 +26,183 @@ review:
     violating the KISS structure requirement
   - Legend at bottom is somewhat redundant with x-axis category labels
   - Minor layout issue with extra whitespace on right side
+  image_description: 'The plot displays 5 notched box plots comparing server response
+    times across different configurations (Baseline, Config A, Config B, Config C,
+    Config D). Each box has a distinctive color: blue for Baseline, yellow for Config
+    A, green for Config B, orange for Config C, and purple for Config D. The boxes
+    show clear notches around the median line, with whiskers extending to show the
+    data range, and outliers displayed as hollow circles. The title "box-notched ·
+    pygal · pyplots.ai" appears at the top. The Y-axis shows "Response Time (ms)"
+    ranging from ~20 to ~220, and the X-axis shows "Server Configuration" with category
+    labels. A legend appears at the bottom identifying each configuration by color.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all readable. Font sizes are
+          appropriate for the canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Category labels are well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Box plots are clearly visible with good sizing. Outlier circles could
+          be slightly larger for better visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-friendly palette with distinct hues (blue, yellow,
+          green, orange, purple).
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though there's some extra whitespace on
+          the right side.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Response Time (ms)" and "Server
+          Configuration".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis grid lines are subtle and helpful. Legend at bottom is functional
+          but slightly redundant with x-axis labels.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct notched box plot type implemented.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis correctly mapped.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: notches at 95% CI, median lines,
+          quartiles, whiskers at 1.5×IQR, outliers as individual points, different
+          colors per category.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range appropriately shows all data including outliers.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 configurations.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows required format: "box-notched · pygal · pyplots.ai".'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent demonstration of notched box plot features: shows overlapping
+          notches (Baseline/Config B), non-overlapping notches (Config A/Config C
+          vs Baseline), outliers in multiple categories, varying spreads.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response time comparison is a realistic, relatable scenario
+          for benchmarking configurations.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times in 20-220ms range are realistic for server performance
+          metrics.
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Code uses functions (`calc_box_stats`, `y_to_svg`) which violates
+          the KISS principle of no functions/classes.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pygal, Style, ET, cairosvg).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Creative use of pygal as a base with custom SVG manipulation to achieve
+          notched boxes, since pygal doesn't natively support notched box plots. Uses
+          pygal's Style system and chart configuration. The SVG manipulation approach
+          is clever but could be considered a workaround rather than using distinctive
+          pygal features.
+  verdict: APPROVED
diff --git a/plots/box-notched/metadata/seaborn.yaml b/plots/box-notched/metadata/seaborn.yaml
index 0c9f5c5ebf..c84a03cd25 100644
--- a/plots/box-notched/metadata/seaborn.yaml
+++ b/plots/box-notched/metadata/seaborn.yaml
@@ -22,3 +22,172 @@ review:
   weaknesses:
   - 'Grid/legend scoring ambiguity: no legend present but this is correct for the
     plot type'
+  image_description: 'The plot displays a notched box plot showing salary distributions
+    across 5 departments (Engineering, Marketing, Sales, Support, HR). Each box uses
+    a distinct color: Engineering (blue #306998), Marketing (yellow #FFD43B), Sales
+    (teal #4ECDC4), Support (coral #E07A5F), and HR (sage green #81B29A). The notches
+    around each median are clearly visible, with the Sales box having particularly
+    wide notches due to its larger spread. The title "box-notched · seaborn · pyplots.ai"
+    appears at the top in bold. The y-axis shows "Annual Salary ($)" with values formatted
+    as $40K-$160K, and the x-axis shows "Department". A subtle grid with dashed lines
+    helps read values. Outliers are visible as gray circles, particularly prominent
+    in Sales (3 high outliers around $130K-$155K) and a few in Marketing and Support.
+    An italic annotation in the bottom-right explains the meaning of notches. The
+    layout is well-balanced with good margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, ticks at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Box widths well-adapted, notches clearly visible, outliers appropriately
+          sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Custom palette with good contrast, no red-green confusion issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, annotation positioned nicely
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has unit "Annual Salary ($)", X-axis appropriately labeled
+          "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha 0.3, dashed style appropriate. No legend
+          needed as categories are on x-axis
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct notched box plot with notch=True
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Notches present, median/quartiles/whiskers visible, outliers shown
+          as individual points, different colors per category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible including high outliers in Sales
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, categories clear on x-axis
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "box-notched · seaborn · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows varied distributions (tight Support vs wide Sales), outliers
+          present, overlapping notches (Marketing/HR) vs non-overlapping (Engineering/Support)
+          demonstrate statistical significance testing. Minor deduction: could show
+          more extreme notch overlap cases'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary distributions across departments is a perfect real-world scenario
+          for notched boxplots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary ranges ($40K-$160K) are realistic for the departments shown
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (plt, np, pd, sns)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/bubble-basic/metadata/altair.yaml b/plots/bubble-basic/metadata/altair.yaml
index afb5e702ca..98e85d689a 100644
--- a/plots/bubble-basic/metadata/altair.yaml
+++ b/plots/bubble-basic/metadata/altair.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/bubble-basic/metadata/bokeh.yaml b/plots/bubble-basic/metadata/bokeh.yaml
index 1811ec9868..8af55cb026 100644
--- a/plots/bubble-basic/metadata/bokeh.yaml
+++ b/plots/bubble-basic/metadata/bokeh.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/bubble-basic/metadata/highcharts.yaml b/plots/bubble-basic/metadata/highcharts.yaml
index 5163f257ba..d7814dca3a 100644
--- a/plots/bubble-basic/metadata/highcharts.yaml
+++ b/plots/bubble-basic/metadata/highcharts.yaml
@@ -25,3 +25,15 @@ review:
     intervals
   - Bubble size legend appears to be cut off or not fully visible at the bottom
   - No categorical color dimension shown (optional per spec but would enhance demonstration)
+  image_description: The plot displays a bubble chart with 30 data points representing
+    tech company comparisons. The bubbles are rendered in a muted blue color (#306998)
+    with semi-transparent fill. The title "bubble-basic · highcharts · pyplots.ai"
+    appears at the top in bold, followed by a subtitle "Bubble size represents Market
+    Capitalization". The x-axis shows "Revenue (Billion USD)" ranging from 0 to 660,
+    and the y-axis shows "Growth Rate (%)" ranging from 0 to 54. Bubbles vary in size
+    from small (representing lower market cap) to large (representing higher market
+    cap). The data shows a general negative correlation between revenue and growth
+    rate - smaller companies have higher growth rates while larger companies have
+    lower growth rates. A subtle grid is visible, and there appears to be a legend
+    entry at the bottom (partially visible).
+  verdict: APPROVED
diff --git a/plots/bubble-basic/metadata/letsplot.yaml b/plots/bubble-basic/metadata/letsplot.yaml
index 278d1c3aef..b0c22ab998 100644
--- a/plots/bubble-basic/metadata/letsplot.yaml
+++ b/plots/bubble-basic/metadata/letsplot.yaml
@@ -23,3 +23,177 @@ review:
   - Grid styling could have lower alpha for even more subtle appearance
   - Could leverage more letsplot-specific interactive features given the library strengths
     in this area
+  image_description: The plot displays a bubble chart with a light blue/steel blue
+    color scheme on a white background with subtle dotted grid lines. The x-axis shows
+    "Revenue (Million USD)" ranging from 10 to 200, and the y-axis shows "Growth Rate
+    (%)" ranging from 0 to 30. There are approximately 40 bubbles of varying sizes
+    scattered across the plot, with bubble sizes representing "Market Share (%)" as
+    shown in the legend on the right side. The legend shows five reference bubble
+    sizes for 5, 10, 15, 20, and 25% market share. The title "bubble-basic · letsplot
+    · pyplots.ai" appears at the top left. The bubbles have a transparency (alpha)
+    applied, allowing overlapping bubbles to remain visible. The data shows a general
+    negative correlation between revenue and growth rate, with higher-revenue companies
+    tending to have lower growth rates.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick marks at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Bubbles are visible with good alpha (0.6), though some smaller bubbles
+          could be slightly larger for 40 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color (#306998) with varying sizes, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, data well-distributed, legend well-placed
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Revenue (Million USD)" and "Growth Rate
+          (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: 'Grid is subtle but legend title size (18) is good; minor: grid could
+          use lower alpha'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bubble chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=revenue, Y=growth rate, size=market share correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: size legend, transparency, three variables
+          visualized'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Market Share (%)" with appropriate size references
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "bubble-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows range of bubble sizes and data distribution, good variation
+          in all three dimensions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market analysis scenario is real and comprehensible (revenue vs growth
+          vs market share)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue 10-200M USD and growth 0-30% are realistic; market share
+          values are reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with theme_minimal, but could leverage
+          more letsplot-specific features like tooltips or interactive elements
+  verdict: APPROVED
diff --git a/plots/bubble-basic/metadata/matplotlib.yaml b/plots/bubble-basic/metadata/matplotlib.yaml
index 621eecc89d..81680a904b 100644
--- a/plots/bubble-basic/metadata/matplotlib.yaml
+++ b/plots/bubble-basic/metadata/matplotlib.yaml
@@ -24,3 +24,178 @@ review:
     comments mentioning company metrics
   - Size distribution appears somewhat random rather than showing interesting patterns
     across the X-Y space
+  image_description: The plot shows a bubble chart with approximately 50 data points
+    displayed as semi-transparent blue circles (#306998) with darker blue edges (#1a3d5c).
+    The bubbles vary significantly in size, representing a third variable. The X axis
+    ranges from ~10 to 100, labeled "X Value", and the Y axis ranges from ~10 to 100,
+    labeled "Y Value". There is a clear positive correlation between X and Y values.
+    A size legend in the upper left corner shows three reference bubble sizes (100,
+    250, 500) with title "Size Value". The title reads "bubble-basic · matplotlib
+    · pyplots.ai" in the correct format. A subtle dashed grid (alpha ~0.3) is visible
+    in the background. The bubbles use alpha=0.6 transparency which allows overlapping
+    bubbles to be distinguished.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlaps, legend well-positioned in upper left away from
+          data
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bubble sizes are well-adapted with good alpha (0.6) for 50 points,
+          though some bubble overlap in the lower-left region makes individual bubbles
+          harder to distinguish
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (blue) with darker edge, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, well-balanced whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Value", "Y Value") but generic without
+          units or context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), size legend clearly shows bubble
+          scale reference
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bubble chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned, size variable mapped to bubble size
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 variables, transparency for overlap
+          handling, size legend included'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Size legend accurately represents bubble scaling with three reference
+          values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bubble-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in all three variables with positive X-Y correlation.
+          Could show more diverse size distribution across the X-Y space
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Data is labeled as "company metrics" in comments but axis labels
+          are generic. Plausible scenario but could be more specific
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible (10-100 range for X/Y, 50-500 for size)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ax.scatter with proper size scaling and edge colors. Basic but
+          correct matplotlib usage. Could leverage colormap or additional matplotlib
+          features
+  verdict: APPROVED
diff --git a/plots/bubble-basic/metadata/plotly.yaml b/plots/bubble-basic/metadata/plotly.yaml
index 1d617ccd21..51d6fe5130 100644
--- a/plots/bubble-basic/metadata/plotly.yaml
+++ b/plots/bubble-basic/metadata/plotly.yaml
@@ -27,3 +27,177 @@ review:
     capabilities
   - Grid lines at 0.1 alpha are barely visible - could be slightly more prominent
     (0.2-0.3)
+  image_description: The plot displays a bubble chart with 40 data points representing
+    company performance metrics. The bubbles are rendered in a muted blue color (#306998)
+    with darker blue outlines and 60% opacity. The x-axis shows "Revenue ($ millions)"
+    ranging from approximately 20 to 80, and the y-axis shows "Growth Rate (%)" ranging
+    from 0 to about 42. Bubble sizes vary significantly, representing market share
+    values. A size legend in the upper right shows three reference bubbles for 5%,
+    15%, and 30% market share with a white background and subtle border. The title
+    "bubble-basic · plotly · pyplots.ai" is centered at the top. The background uses
+    a clean white template with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all excellently
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, bubbles have good transparency for
+          overlapping areas
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bubble sizes well-adapted for 40 data points, good alpha of 0.6,
+          though a few central bubbles cluster
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Perfect layout with appropriate margins, legend well-positioned outside
+          plot area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Revenue ($ millions)" and "Growth
+          Rate (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is very subtle (good), but legend could use better visual differentiation
+          between size levels
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bubble chart with three dimensions mapped
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Revenue, Y=Growth Rate, Size=Market Share correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: transparency for overlaps, size legend,
+          area-based scaling'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Size legend with "Market Share" title and representative values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variety of bubble sizes and positions, though size distribution
+          could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent business scenario: companies by revenue vs growth with
+          market share - directly from spec applications'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue 20-80M, Growth 0-42%, Market Share 2-35% - all realistic
+          business values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses graph_objects correctly with hover templates, but could leverage
+          more Plotly-specific features like color gradients or annotations
+  verdict: APPROVED
diff --git a/plots/bubble-basic/metadata/plotnine.yaml b/plots/bubble-basic/metadata/plotnine.yaml
index de1124b5ae..ff2ddadfb4 100644
--- a/plots/bubble-basic/metadata/plotnine.yaml
+++ b/plots/bubble-basic/metadata/plotnine.yaml
@@ -25,3 +25,176 @@ review:
     a more realistic scenario
   - Could showcase more plotnine features like adding a color aesthetic for a fourth
     dimension as suggested in spec notes
+  image_description: The plot displays a bubble chart with approximately 50 data points
+    on a clean minimal theme background. The x-axis ranges from 6 to 14 (labeled "X
+    Value") and the y-axis from approximately 7 to 14 (labeled "Y Value"). All bubbles
+    are rendered in a consistent blue color (#306998) with transparency (alpha ~0.6).
+    Bubble sizes vary noticeably, representing the third dimension. A size legend
+    on the right side shows three reference bubbles for sizes 25, 50, and 75. The
+    title "bubble-basic · plotnine · pyplots.ai" is displayed at the top. The overall
+    layout is well-proportioned with a 16:9 aspect ratio. The data shows a positive
+    correlation between X and Y values with good variation in bubble sizes.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, axis text at 16pt, legend text
+          at 14-18pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, bubbles have good alpha transparency
+          to handle visual overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Bubbles are well-sized with scale_size_area(max_size=20), good visibility.
+          Minor deduction: some small bubbles could be slightly more prominent'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good proportions, minimal background with theme_minimal(). Small
+          deduction: legend could be positioned slightly better'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Value" and "Y Value" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle from theme_minimal(), legend is present and clear
+          but positioned on plot area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bubble chart using geom_point with size aesthetic
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, and size correctly mapped to aesthetics
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: size legend, transparency (alpha=0.6),
+          scale_size_area for area-based scaling'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Size legend correctly shows scale with values 25, 50, 75
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bubble-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: 'Shows good variation in all three dimensions (x, y, size). Minor
+          deduction: could show more extreme size variation'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible synthetic data with correlation between x and y, generic
+          but acceptable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are reasonable and sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_point and scale_size_area, theme_minimal,
+          element_text customization. This is standard plotnine usage but doesn't
+          showcase advanced features like faceting or additional aesthetics
+  verdict: APPROVED
diff --git a/plots/bubble-basic/metadata/pygal.yaml b/plots/bubble-basic/metadata/pygal.yaml
index ee4b39da53..26aaa79dd8 100644
--- a/plots/bubble-basic/metadata/pygal.yaml
+++ b/plots/bubble-basic/metadata/pygal.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/bubble-basic/metadata/seaborn.yaml b/plots/bubble-basic/metadata/seaborn.yaml
index 926f7b62ff..499c905da0 100644
--- a/plots/bubble-basic/metadata/seaborn.yaml
+++ b/plots/bubble-basic/metadata/seaborn.yaml
@@ -25,3 +25,173 @@ review:
   - Could leverage seaborn hue parameter to add color as optional fourth dimension
     per spec suggestion
   - Data context is synthetic without a compelling real-world scenario
+  image_description: 'The plot displays a bubble chart with 50 semi-transparent blue
+    bubbles (#306998) on a white background with subtle dashed grid lines. The bubbles
+    vary in size from small (Size: 20) to large (Size: 80), representing a third variable.
+    The X-axis is labeled "X Value" and ranges from approximately 20 to 80, while
+    the Y-axis is labeled "Y Value" and ranges from approximately 15 to 70. The data
+    shows a clear positive correlation between X and Y values. A size legend titled
+    "Bubble Size" appears in the upper-left corner with three reference bubbles (20,
+    50, 80). The title "bubble-basic · seaborn · pyplots.ai" is displayed at the top
+    in the correct format.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend well-placed
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bubbles are well-sized with good alpha (0.6) for 50 points; minor
+          deduction for some dense overlap areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, 16:9 aspect ratio, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are "X Value" and "Y Value" - descriptive but without units
+          or real-world context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle with alpha=0.3 and dashed style, legend well-positioned
+          with framealpha
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bubble chart showing three variables (x, y, size)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned with size as third dimension
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: size legend, transparency, proper scaling'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 50 data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Size legend correctly shows scaling from 20 to 80
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "bubble-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied bubble sizes, positive correlation, overlapping bubbles;
+          minor deduction as data could show more diverse size distribution patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Synthetic data with plausible correlation, but generic "X Value/Y
+          Value" rather than real-world scenario like market analysis
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values are sensible (X: 20-80, Y: 15-70, Size: 20-100)'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean Imports → Data → Plot → Save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's scatterplot with DataFrame integration and size parameter,
+          but doesn't leverage seaborn's advanced features like hue/style or statistical
+          annotations
+  verdict: APPROVED
diff --git a/plots/bubble-packed/metadata/altair.yaml b/plots/bubble-packed/metadata/altair.yaml
index 048fbd2bb7..4004947360 100644
--- a/plots/bubble-packed/metadata/altair.yaml
+++ b/plots/bubble-packed/metadata/altair.yaml
@@ -28,3 +28,187 @@ review:
     would help)
   - The optional grouping feature from the spec could have been demonstrated to show
     clustering capability
+  image_description: The plot displays a packed bubble chart showing department budget
+    allocation across 15 departments. Each department is represented by a circle whose
+    size corresponds to its budget value. The circles are packed together without
+    overlap using a physics-based simulation. The color scheme uses a colorblind-safe
+    palette with Python Blue (#306998), Yellow (#FFD43B), Teal (#4A90A4), Sage Green
+    (#7B9E89), and Coral (#E07A5F). The largest circles (Engineering $850K, R&D $750K,
+    Sales $680K) are prominently displayed near the center, while smaller departments
+    are positioned around them. Labels with department names and budget values (e.g.,
+    "Engineering $850K") are displayed inside the larger bubbles in white text. The
+    title "Department Budget Allocation · bubble-packed · altair · pyplots.ai" appears
+    at the top. No axes are visible as this is a packed bubble chart where position
+    has no meaning - only size matters.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt is excellent, labels inside bubbles are readable in
+          white bold text. Slightly docked because some smaller bubbles lack visible
+          labels.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping circles or text elements. The packing algorithm works
+          well.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circle sizes are well differentiated, ranging from small to large.
+          All 15 bubbles are clearly visible with good spacing.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses a colorblind-safe palette avoiding pure red-green distinctions.
+          Good contrast throughout.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout with circles well-distributed. Slightly asymmetric
+          packing but acceptable.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for packed bubble charts (axes intentionally hidden, which is
+          correct for this plot type).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend provided to explain the color encoding. While tooltips
+          are available in HTML, the static PNG lacks color explanation.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct packed bubble chart with physics simulation for circle packing.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Size correctly mapped to budget values using sqrt for area-proportional
+          scaling as specified.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: labels inside large circles, tooltips,
+          no overlap, color encoding.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, from $150K (Legal) to $850K (Engineering).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (color is cyclic, not meaningful grouping - acceptable per spec
+          where grouping is optional).
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Department Budget Allocation · bubble-packed ·
+          altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows good range of bubble sizes. However, optional grouping feature
+          from spec not demonstrated.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department budget allocation is a perfect real-world scenario for
+          packed bubble charts.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values ($150K-$850K) are realistic for department budgets.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Script-style code without functions/classes. Clear imports → data
+          → plot → save structure.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic results.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png and plot.html (correct for Altair).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's declarative approach with layered marks, tooltips,
+          and mark_text. However, the circle packing is done manually in Python rather
+          than leveraging any Altair-specific features. Altair is primarily used for
+          rendering.
+  verdict: APPROVED
diff --git a/plots/bubble-packed/metadata/bokeh.yaml b/plots/bubble-packed/metadata/bokeh.yaml
index d19ae8d5ca..f5a834cc05 100644
--- a/plots/bubble-packed/metadata/bokeh.yaml
+++ b/plots/bubble-packed/metadata/bokeh.yaml
@@ -22,3 +22,166 @@ review:
   - White text on yellow circles has lower contrast than on blue circles
   - Color palette repeats without logical grouping meaning
   - No legend or color key explaining what colors represent
+  image_description: 'The plot displays a packed bubble chart with 15 circles representing
+    department budgets. Circle sizes scale by budget amount (from $6M Security to
+    $45M Engineering). Colors alternate between Python blue tones (#306998, #4B8BBE,
+    #3776AB) and yellow tones (#FFD43B, #FFE873). Larger circles display labels with
+    department names and budget values in white text (e.g., ''Engineering $45M'',
+    ''R&D $42M''). The title ''Department Budgets · bubble-packed · bokeh · pyplots.ai''
+    appears at the top. Circles are tightly packed without overlap on a light gray
+    background (#f8f9fa), with white outlines separating them. Axes are hidden as
+    appropriate for this chart type.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 36pt is excellent, labels at 24pt/20pt are readable on larger
+          circles, but some yellow circles have low contrast with white text
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All circles properly separated with 10px padding, no text overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circle sizes well-adapted, smallest (Security $6M) still clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe, but could benefit from more
+          distinct hues for easier differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent packing, circles centered in frame with good proportions
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed, tooltips provide legend functionality
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct packed bubble chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Size correctly represents value (area-scaled)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Circle packing simulation, labels, tooltips all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 departments visible and readable
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Labels and tooltips correctly show department and budget
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format 'Department Budgets · bubble-packed · bokeh ·
+          pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in bubble sizes well, but no grouping demonstrated
+          (optional per spec)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department budgets is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Budget values ($6M-$45M) are plausible, though spread could be wider
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → simulation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (extra file, but not wrong)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource, LabelSet, hover tooltips; could leverage
+          more Bokeh features like CustomJS callbacks
+  verdict: APPROVED
diff --git a/plots/bubble-packed/metadata/highcharts.yaml b/plots/bubble-packed/metadata/highcharts.yaml
index 891b024572..a9a0f65108 100644
--- a/plots/bubble-packed/metadata/highcharts.yaml
+++ b/plots/bubble-packed/metadata/highcharts.yaml
@@ -26,3 +26,176 @@ review:
   - Title includes extra descriptor which deviates slightly from the pure spec-id
     format
   - Some smaller bubbles have no visible labels even though they are part of the visualization
+  image_description: 'The plot displays a packed bubble chart showing market sectors
+    with circles of varying sizes representing market value in billions of dollars.
+    The visualization uses 5 distinct colors: blue (Technology sector with Software,
+    Hardware, Cloud Services, Semiconductors, Cybersecurity), yellow (Finance sector
+    with Banking, Insurance, Asset Management, Fintech), purple (Healthcare with Pharmaceuticals,
+    Medical Devices, Biotech, Healthcare Services), cyan (Energy with Oil & Gas, Renewables,
+    Utilities), and brown/tan (Consumer with Retail, Food & Beverage, Automotive,
+    Entertainment). The largest bubbles are Software (850B), Banking (720B), and Cloud
+    Services (680B). Labels appear inside larger bubbles with white text and contrast
+    outline. The title follows the correct format, subtitle explains the size encoding,
+    and a legend at the bottom identifies each sector.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable, though some smaller bubble
+          labels are slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Bubbles are properly packed without overlap, labels filtered to show
+          only on larger bubbles
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bubble sizes are well-proportioned, though the central packing leaves
+          large white space around edges
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, purple, cyan, brown)
+          avoiding red-green
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Bubbles centered but significant whitespace on all sides
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for packed bubble, but subtitle explains size meaning with units
+          ($B)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend present and clear, no grid needed for this chart type
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct packed bubble chart using physics simulation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Size correctly represents value, grouping by sector
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has labels, values, groups, size encoding, packing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the packed layout
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 sectors
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title is "bubble-packed · Market Sectors · highcharts · pyplots.ai"
+          - includes extra descriptor but follows pattern
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows grouping, varying sizes, labels; could show more size variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market sector/industry data is a perfect real-world use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in billions are reasonable for market caps
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Creates both plot.png and plot.html correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts packedbubble type with layoutAlgorithm, dataLabels
+          filter, but doesn't leverage advanced interactivity features in the static
+          output
+  verdict: APPROVED
diff --git a/plots/bubble-packed/metadata/letsplot.yaml b/plots/bubble-packed/metadata/letsplot.yaml
index 5fe4cf05d0..fef4525ded 100644
--- a/plots/bubble-packed/metadata/letsplot.yaml
+++ b/plots/bubble-packed/metadata/letsplot.yaml
@@ -26,3 +26,174 @@ review:
   - Some smaller bubbles (QA, Legal, Security) have labels that are slightly cramped
   - Does not utilize lets-plot interactive tooltip features which would be valuable
     for this chart type
+  image_description: 'The plot displays a packed bubble chart showing department budget
+    allocation across 15 departments. Bubbles are colored by three divisions: Tech
+    (yellow/gold #FFD43B), Business (teal/cyan #4ECDC4), and Operations (dark blue
+    #306998). The largest bubble is "Engineering" at the bottom center, followed by
+    "R&D", "Product", "Sales", and "Marketing". Each bubble contains a white bold
+    label inside. The bubbles are tightly packed without overlap, demonstrating the
+    force-directed packing algorithm. The title "Department Budget Allocation · bubble-packed
+    · letsplot · pyplots.ai" appears at the top center. A legend on the right shows
+    the three divisions. The background is clean white (theme_void).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is clear at 24pt, labels inside bubbles are readable in white
+          bold, though some smaller bubbles (QA, Legal) have slightly cramped text
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Bubbles do not overlap; force simulation works correctly
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bubble sizes well-differentiated, though the smallest bubbles (Legal,
+          QA) are on the edge of readability
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow, teal, and blue are colorblind-safe and have good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of space, bubbles centered with appropriate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for packed bubble chart (uses theme_void appropriately)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-placed on right; however, grid is not applicable (theme_void)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct packed bubble chart with physics simulation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Size correctly maps to value, color maps to group
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has labels, values, groups, and force-directed packing as per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 departments visible with appropriate sizing
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Division" with Tech, Business, Operations
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Department Budget Allocation · bubble-packed ·
+          letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows range of sizes from small (Legal, HR) to large (Engineering),
+          grouping by division; could show more variation in group sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department budget allocation is a perfect real-world application
+          matching the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (22-85) are reasonable budget units; could benefit from explicit
+          units
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → simulation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set for both data and positions
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves as plot.png but also plot.html (minor: both outputs correct)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, theme_void, coord_fixed, scale_size, scale_color_manual;
+          however, does not leverage lets-plot's interactive tooltips which would
+          enhance this visualization
+  verdict: APPROVED
diff --git a/plots/bubble-packed/metadata/matplotlib.yaml b/plots/bubble-packed/metadata/matplotlib.yaml
index 930e088af6..9038a5fd5d 100644
--- a/plots/bubble-packed/metadata/matplotlib.yaml
+++ b/plots/bubble-packed/metadata/matplotlib.yaml
@@ -25,3 +25,167 @@ review:
   - Some smaller bubbles (Legal, Security) are unlabeled and their departments are
     unclear without a legend
   - Customer Support bubble is not visible in the visualization
+  image_description: 'The plot displays a packed bubble chart representing "Department
+    Budget Allocation" with 15 circular bubbles of varying sizes. The largest bubbles
+    are Engineering ($850K), R&D ($750K), Sales ($680K), and Product ($550K). The
+    bubbles are color-coded into four groups: blue (Tech: Engineering, Sales, IT,
+    Product), yellow (Business/Creative: Marketing, R&D, Design, Data Science), teal
+    (Support: Operations, HR, Finance, Customer Support), and sage green (Compliance/Quality:
+    Legal, Security, QA). Larger bubbles display department names in bold white text
+    with values below. The bubbles are tightly packed without overlapping, demonstrating
+    the physics simulation. The title reads "Department Budget Allocation · bubble-packed
+    · matplotlib · pyplots.ai" in bold at the top. The background is white with axes
+    turned off for a clean visualization.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is 24pt and clear; labels inside bubbles are readable but some
+          smaller bubbles (HR, QA) have slightly cramped text
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Excellent bubble packing with no overlapping elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circle sizes well-adapted, proper area-based scaling using sqrt
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct, colorblind-safe colors (blue, yellow, teal, sage)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight asymmetry but natural for packed bubbles
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed; color grouping is implicit and clear
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct packed bubble chart with physics simulation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Size correctly represents budget values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels, values, color grouping all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 departments visible and readable
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color grouping is intuitive (no explicit legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correctly formatted: "{description} · bubble-packed · matplotlib
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying sizes, groupings, and labels; could show more extreme
+          size variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department budgets is a real, comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Budget values ($150K-$850K) are realistic for departments
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential structure: imports → data → packing simulation
+          → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot, matplotlib.patches, numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib patches (Circle) and custom physics simulation; could
+          leverage matplotlib.collections for efficiency
+  verdict: APPROVED
diff --git a/plots/bubble-packed/metadata/plotly.yaml b/plots/bubble-packed/metadata/plotly.yaml
index 80eca7238d..4ccf0bb6a1 100644
--- a/plots/bubble-packed/metadata/plotly.yaml
+++ b/plots/bubble-packed/metadata/plotly.yaml
@@ -25,3 +25,185 @@ review:
   - No legend showing total budget or providing a size reference scale
   - Text in smallest bubbles (Admin, Legal, Design) appears slightly cramped
   - Optional grouping feature from spec not demonstrated (though marked optional)
+  image_description: The plot displays a packed bubble chart with 15 circles representing
+    department budget allocations. Each circle is labeled with the department name
+    (bold) and budget value (in $M or $K format). The circles are packed tightly together
+    without overlap, with the largest circles being Engineering ($4.5M), R&D ($3.8M),
+    Sales ($3.2M), and Marketing ($2.8M). Smaller departments like Admin ($450K),
+    Legal ($650K), and Design ($720K) appear as smaller circles. The color palette
+    is diverse - Python blue for R&D, yellow for Engineering, various other colors
+    including pink, orange, teal, brown, and purple. The title "Department Budget
+    Allocation · bubble-packed · plotly · pyplots.ai" appears at the top center in
+    dark gray text. The background is clean white with no grid lines or axes shown.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text is clearly readable. Title is large and prominent. Department
+          names and values are legible inside each bubble with white text that contrasts
+          well against the colored backgrounds.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are properly packed without overlapping. Text is contained
+          within each bubble.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Bubble sizes are well-proportioned to show the data differences.
+          The force simulation effectively packed the circles. Minor: some smaller
+          bubbles (Admin, Legal) have slightly cramped text.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses a colorblind-safe palette with sufficient variation. Python
+          blue and yellow featured prominently, other colors well-differentiated.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with circles centered in the plot area. White space
+          around the packed bubbles is balanced.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for packed bubble chart - correctly hidden as position has no
+          meaning.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend shown. While a legend isn't strictly necessary since labels
+          are on bubbles, a small legend or group indicator could enhance understanding.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct packed bubble chart where position has no meaning, only size
+          matters.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Size correctly represents budget value, scaled by area (sqrt) as
+          spec requires.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels inside circles, force simulation for packing, color encoding
+          categories.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 departments visible with appropriate size range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Labels directly on bubbles, no separate legend needed.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{description} · bubble-packed · plotly · pyplots.ai"
+          format.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows good variation in circle sizes from $450K to $4.5M (10x range).
+          However, the spec mentions optional grouping which isn't demonstrated.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department budget allocation is a perfect, real-world scenario for
+          packed bubble charts.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values are realistic for a medium-to-large company.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → simulation → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with annotations for labels and hover templates for
+          interactivity. The HTML export enables Plotly's interactive features. Could
+          leverage Plotly's animation capabilities for the force simulation or use
+          custom shapes.
+  verdict: APPROVED
diff --git a/plots/bubble-packed/metadata/plotnine.yaml b/plots/bubble-packed/metadata/plotnine.yaml
index f051477b29..40e6306e53 100644
--- a/plots/bubble-packed/metadata/plotnine.yaml
+++ b/plots/bubble-packed/metadata/plotnine.yaml
@@ -24,3 +24,178 @@ review:
     S which loses readability
   - No distinctive plotnine grammar of graphics features used - relies heavily on
     manual polygon construction
+  image_description: 'The plot displays a packed bubble chart with 15 circles representing
+    department budgets. Circles vary in size based on budget value, with Engineering
+    (largest, blue) and R&D (blue) being most prominent, followed by Marketing (yellow/gold),
+    Sales (yellow), and Operations (green). The circles are color-coded by four department
+    groups: Tech (blue #0072B2), Business (yellow/gold #E69F00), Operations (green
+    #009E73), and Support (purple/pink #CC79A7). Labels are displayed inside circles
+    with white bold text - larger circles show full or truncated labels ("Engineerin",
+    "Customer S"), while smallest circles show abbreviated labels. The circles are
+    tightly packed without overlap. Title "bubble-packed · plotnine · pyplots.ai"
+    appears at top center in bold black text. A legend on the right identifies the
+    four department groups.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, legend text clear. Some labels truncated but
+          intentional for small circles. White-on-color labels readable.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are well-packed without overlap, clear spacing between all
+          elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Circle sizes well-differentiated showing value hierarchy, though
+          some smaller circles quite small
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Okabe-Ito colorblind-safe palette with good contrast between
+          groups
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of 16:9 space, centered bubble cluster with legend positioned
+          well
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for packed bubble chart (no axes), appropriate use of theme_void
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean void theme, legend well-positioned with clear title "Department
+          Group"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct packed bubble chart with circles sized by value
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Size correctly represents budget value with area-based scaling
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has labels, values for sizing, group coloring, and proper packing
+          algorithm
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 departments visible with appropriate size range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "bubble-packed · plotnine · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variety of sizes, group clustering by color, realistic budget
+          distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department budgets are a perfect real-world use case for packed bubbles
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values in millions (5-45) are realistic for department spending
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → packing algorithm → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses geom_polygon for circles which is a workaround since plotnine
+          doesn't have native circle/bubble packing. This is creative but not a distinctive
+          plotnine feature.
+  verdict: APPROVED
diff --git a/plots/bubble-packed/metadata/pygal.yaml b/plots/bubble-packed/metadata/pygal.yaml
index fe0dc82749..5bd248bed8 100644
--- a/plots/bubble-packed/metadata/pygal.yaml
+++ b/plots/bubble-packed/metadata/pygal.yaml
@@ -24,3 +24,171 @@ review:
   - Value labels show only dollar amounts without category names - larger circles
     could fit abbreviated labels
   - Some smaller circles have cramped or small value labels
+  image_description: 'The plot displays a packed bubble chart representing department
+    budget allocation across four groups: Technology (blue), Marketing (yellow), Operations
+    (teal/cyan), and Sales (coral/pink). The largest bubble in the center shows "$450K"
+    (Software Development, Technology group). Circles are well-packed without overlap,
+    with sizes proportional to budget values ranging from $90K to $450K. White value
+    labels appear inside larger bubbles. The title "bubble-packed · pygal · pyplots.ai"
+    is at the top center. A horizontal legend at the bottom shows the four group categories
+    with color indicators. The background is white with subtle dark outlines around
+    each circle.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All value labels clearly readable; title and legend text good size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Excellent circle packing with no overlapping elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circle sizes well adapted to show value differences
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, teal, coral) are colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout, slight asymmetry in packing but acceptable
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for packed bubble charts (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom, no distracting grid
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct packed bubble chart with circles sized by value
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to circle area
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has grouping by color, value labels; tooltips present in SVG (less
+          visible in PNG)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 16 data items visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all four groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows size variation well; grouping visible by color; missing category
+          labels on circles
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department budget allocation is an excellent, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Budget values in $K are realistic, though $450K for Software Development
+          is on the high side for some organizations
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Has one function (add_packed_bubbles) but necessary for XML filter
+          approach
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of pygal's XML filter API to add custom SVG elements,
+          leverages native SVG rendering
+  verdict: APPROVED
diff --git a/plots/bubble-packed/metadata/seaborn.yaml b/plots/bubble-packed/metadata/seaborn.yaml
index c3c9cdd646..35c606d274 100644
--- a/plots/bubble-packed/metadata/seaborn.yaml
+++ b/plots/bubble-packed/metadata/seaborn.yaml
@@ -25,3 +25,176 @@ review:
   weaknesses:
   - Seaborn library features are underutilized; most visualization logic is custom
     matplotlib code with seaborn providing only the scatter layer
+  image_description: 'The plot displays a packed bubble chart showing company market
+    values by sector. The chart contains approximately 17 circles of varying sizes
+    packed together without overlap. Colors encode four sectors: Technology (green/teal),
+    Finance (orange/coral), Healthcare (blue/lavender), and Retail (pink). The largest
+    bubbles are Apple, Microsoft, Amazon, and Google. Most company names appear as
+    white bold text inside their respective bubbles. The title "bubble-packed · seaborn
+    · pyplots.ai" appears at the top in black bold text. A legend at the bottom right
+    shows the four sector categories with colored squares. The background is white,
+    and circles have white edge strokes separating them visually.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and most labels are clearly readable. Company names inside
+          bubbles are bold white text. Font sizes are appropriate for the image resolution.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements. All labels are fully readable.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bubbles are well-sized relative to their values, clearly visible
+          with good alpha (0.9).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Set2 palette is colorblind-safe with good distinction between the
+          four sectors.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of space, circles packed efficiently. Slight asymmetry but
+          overall balanced.
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for packed bubble charts (axes are intentionally hidden).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate), legend is well-placed and clearly shows sector
+          categories.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct packed bubble chart with circles representing values.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Circle size correctly represents company market value (scaled by
+          area via sqrt).
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has labels, groups (sectors), color encoding, and packed layout.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data is visible within the plot area.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four sectors with matching colors.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "bubble-packed · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in sizes across and within sectors. Good range from
+          small (Target, Pfizer) to large (Apple, Amazon).
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market cap data for real companies is a perfect real-world scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in billions USD are plausible, though some specific values
+          may not match current market caps exactly.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → packing algorithm → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, matplotlib, seaborn).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Minor: seaborn 0.13 shows deprecation warnings for some patterns.'
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'.
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot with hue/size mapping and sns.set_style, but
+          the core visualization relies heavily on matplotlib patches and custom packing
+          logic. Seaborn's role is somewhat limited to the scatter call and styling.
+  verdict: APPROVED
diff --git a/plots/bullet-basic/metadata/altair.yaml b/plots/bullet-basic/metadata/altair.yaml
index 361f28661f..37ce398268 100644
--- a/plots/bullet-basic/metadata/altair.yaml
+++ b/plots/bullet-basic/metadata/altair.yaml
@@ -26,3 +26,177 @@ review:
   - 'Does not display actual values as text labels on the bars (spec notes: Consider
     adding the actual value as a text label)'
   - Could leverage Altair interactivity features (tooltips showing original values)
+  image_description: 'The plot displays 4 horizontal bullet charts stacked vertically
+    for different KPI metrics: Revenue ($K), Profit ($K), New Customers, and Satisfaction
+    (1-5). Each bullet chart shows three grayscale background bands (Poor in light
+    gray #d9d9d9, Satisfactory in medium gray #bdbdbd, Good in dark gray #969696)
+    representing qualitative performance ranges. The actual performance value is shown
+    as a blue bar (Python Blue #306998), and the target is marked with a thin black
+    vertical tick mark. The title "bullet-basic · altair · pyplots.ai" appears at
+    the top center in large font. The x-axis is labeled "% of Maximum" ranging from
+    0 to ~108. The y-axis shows the metric names with appropriate units. A legend
+    at the bottom identifies the three performance bands.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title ~28pt, axis labels ~18pt, all clearly readable. Slightly smaller
+          than ideal but still very good.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars and target markers are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Grayscale bands are colorblind-safe; blue actual bar provides good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though vertical spacing between bullets is quite
+          large
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "% of Maximum" which is descriptive but no unit; Y-axis
+          metrics have units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well placed at bottom; no grid visible (acceptable for
+          bullet charts)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bullet chart type with all required components
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Actual values as bars, targets as markers, ranges as background bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has actual bar, target marker, qualitative range bands per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Shows all data clearly with scale extending to 110%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Poor/Satisfactory/Good bands
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format `bullet-basic · altair · pyplots.ai`
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 metrics with varied actual/target relationships (above target,
+          below target), different scale normalizations
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Business KPIs (Revenue, Profit, Customers, Satisfaction) are realistic
+          dashboard metrics
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values are realistic: Revenue $275K, Profit $85K, 320 customers,
+          4.2/5 satisfaction'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → dataframes → charts → layer → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of layering (alt.layer), mark_bar with x/x2 for ranges,
+          mark_tick for targets. Could have used tooltips or selection for more Altair-specific
+          interactivity.
+  verdict: APPROVED
diff --git a/plots/bullet-basic/metadata/bokeh.yaml b/plots/bullet-basic/metadata/bokeh.yaml
index ec9eba9f30..3cec0d787a 100644
--- a/plots/bullet-basic/metadata/bokeh.yaml
+++ b/plots/bullet-basic/metadata/bokeh.yaml
@@ -24,3 +24,173 @@ review:
   weaknesses:
   - Missing legend to explain the grayscale bands (poor/satisfactory/good)
   - Could use ColumnDataSource for more idiomatic Bokeh code
+  image_description: 'The plot displays 5 horizontal bullet charts stacked vertically
+    for sales performance metrics: Revenue, Profit, Orders, Customers, and Satisfaction.
+    Each bullet chart shows three grayscale background bands (light gray for poor,
+    medium gray for satisfactory, dark gray for good range), a blue (#306998) horizontal
+    bar representing the actual value, and a thin black vertical marker indicating
+    the target. Metric labels appear on the left side in bold dark text, and actual
+    values are displayed in blue text to the right of each bar. The title "bullet-basic
+    · bokeh · pyplots.ai" appears centered at the top. The x-axis shows "% of Target
+    Range" from 0-100, and all elements are well-proportioned with good spacing between
+    rows.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, labels at 28pt, axis labels at 22pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean spacing between bullets
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars, markers, and ranges all clearly visible with appropriate sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Grayscale ranges avoid colorblind issues, blue bar has good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though excessive whitespace at top and bottom margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"% of Target Range" is descriptive but lacks parenthetical units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle dashed grid (alpha 0.3), but no legend explaining grayscale
+          bands
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bullet chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Actual values, targets, and ranges correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: actual bar, target marker, qualitative
+          bands, labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within normalized 0-100% range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend for qualitative bands (poor/satisfactory/good)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "bullet-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows variety: metrics above target (Revenue, Customers), below
+          target (Profit, Orders, Satisfaction), different scales'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales performance dashboard is a classic bullet chart use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic (revenue in hundreds, satisfaction 1-5 scale)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          without comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Bokeh features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses figure, rect glyphs, Label annotations, export_png/save - standard
+          usage but no ColumnDataSource or hover tools
+  verdict: APPROVED
diff --git a/plots/bullet-basic/metadata/highcharts.yaml b/plots/bullet-basic/metadata/highcharts.yaml
index bd1935fcce..8967c8f4c6 100644
--- a/plots/bullet-basic/metadata/highcharts.yaml
+++ b/plots/bullet-basic/metadata/highcharts.yaml
@@ -27,3 +27,176 @@ review:
   - Grid/legend scoring limited since bullet charts do not typically use these elements
   - Vertical spacing between metrics could be tighter for more compact dashboard view
   - Y-axis title slightly small relative to 4800x2700 canvas
+  image_description: 'The plot displays a horizontal bullet chart with 4 KPI metrics
+    stacked vertically: Revenue ($K), Profit (%), New Customers, and Satisfaction
+    (/5). Each metric shows a blue bar (Python blue #306998) representing the actual
+    value, with a thin vertical black target marker line. The background uses three
+    grayscale bands (light gray ~#e0e0e0, medium gray ~#b0b0b0, dark gray ~#808080)
+    representing poor/satisfactory/good qualitative ranges. Data labels in white text
+    display actual values inside the bars (275$K, 22%, 1650, 4.5/5). The title "bullet-basic
+    · highcharts · pyplots.ai" appears at the top with a subtitle "Q4 Performance
+    Dashboard - Actual vs Target". The x-axis shows percentage scale (0-100%) with
+    "% of Target Range" label. Category labels are positioned on the left side.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, labels, and data values are clearly readable; tick labels
+          slightly small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars and target markers clearly visible with good sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Grayscale bands are colorblind-safe; blue bar provides good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions; slight excess whitespace between metrics
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "% of Target Range" with units; category labels include
+          units ($K, %, /5)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid (appropriate for bullet chart); legend disabled (appropriate
+          since single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bullet chart type with horizontal orientation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Actual values as bars, targets as markers, ranges as background bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: actual bar, target marker, qualitative
+          ranges, data labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Normalized 0-100% scale shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly disabled for single-series bullet chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bullet-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows metrics above target (Revenue, New Customers) and below target
+          (Profit, Satisfaction); demonstrates variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Q4 Performance Dashboard with realistic business KPIs (revenue, profit
+          margin, customer acquisition, satisfaction score)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic; Revenue $275K/$250K target, 22% profit margin,
+          1650 customers, 4.5/5 satisfaction
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → chart options → render → screenshot'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: No random data used (deterministic), but lacks explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts bullet module
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses Highcharts bullet module with targetOptions, plotBands, custom
+          tooltips, dataLabels; could leverage more advanced features like animation
+          or responsive breakpoints
+  verdict: APPROVED
diff --git a/plots/bullet-basic/metadata/letsplot.yaml b/plots/bullet-basic/metadata/letsplot.yaml
index 6d163efdc1..7bba857f2c 100644
--- a/plots/bullet-basic/metadata/letsplot.yaml
+++ b/plots/bullet-basic/metadata/letsplot.yaml
@@ -19,4 +19,169 @@ review:
   - Perfect grayscale color scheme following Stephen Few design guidelines
   - Multiple KPIs with varying performance levels demonstrate comparison capability
   - Clean normalization approach allows comparison across metrics with different scales
-  weaknesses: []
+  weaknesses:
+  - None significant - implementation meets all quality criteria
+  image_description: The plot displays a horizontal bullet chart with 4 KPI metrics
+    (Revenue, Profit, Customer Satisfaction, Market Share) arranged vertically. Each
+    metric shows three grayscale background bands (dark gray for Poor, medium gray
+    for Satisfactory, light gray for Good), a blue (#306998) horizontal bar representing
+    actual performance, and a black vertical marker indicating the target value. The
+    title "bullet-basic · letsplot · pyplots.ai" is displayed at the top. The x-axis
+    is labeled "Performance (%)" ranging from 0-110, and a legend at the bottom explains
+    the performance ranges. The design follows Stephen Few's bullet chart specification
+    with clean, minimal styling.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text clearly readable with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bars and markers appropriately sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: grayscale ranges are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: good proportions and spacing
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: x-axis has label with units, y-axis uses metric names (appropriate
+          for chart type)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: subtle grid, legend well-placed at bottom
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct bullet chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: actual as bar, target as marker, ranges as background
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: all spec features present (actual, target, ranges, labels)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: axes show all data (0-110%)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: legend correctly describes performance ranges
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'uses exact format: bullet-basic · letsplot · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows multiple metrics with varying performance levels
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: plausible business KPI scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: sensible values for business metrics
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: deterministic data, no randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: os/shutil needed for lets-plot file handling
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent ggplot2 grammar usage
+  verdict: APPROVED
diff --git a/plots/bullet-basic/metadata/matplotlib.yaml b/plots/bullet-basic/metadata/matplotlib.yaml
index 682de59e79..a182f2ec12 100644
--- a/plots/bullet-basic/metadata/matplotlib.yaml
+++ b/plots/bullet-basic/metadata/matplotlib.yaml
@@ -22,3 +22,179 @@ review:
   - Realistic business dashboard scenario with meaningful metrics
   weaknesses:
   - No x-axis label (though justified by different units per metric)
+  image_description: |-
+    The plot displays 4 horizontal bullet charts stacked vertically, each representing a different KPI metric. From top to bottom: Revenue (275$K actual vs 250 target), Profit (45% actual vs 50 target), New Customers (85 actual vs 100 target), and Satisfaction (4.2/5 actual vs 4.5 target). Each bullet chart features:
+    - A blue (#306998) horizontal bar showing the actual value
+    - A black vertical marker line indicating the target
+    - Three grayscale background bands (light gray for Poor, medium gray for Satisfactory, darker gray for Good)
+    - Value labels displayed to the right of each bar with appropriate units
+    The title "bullet-basic · matplotlib · pyplots.ai" is centered at the top. A legend in the lower right corner explains the color coding. The layout is clean with subtle dotted vertical grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, y-axis labels at 18pt, tick labels at 16pt, value
+          labels at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars, target markers, and bands are all clearly visible and appropriately
+          sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Grayscale bands are colorblind-safe, blue bar has good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though the different scales cause visual imbalance
+          between metrics (e.g., Satisfaction bar appears tiny compared to Revenue)
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No x-axis label (intentionally omitted due to different units per
+          metric, which is reasonable but technically loses points)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with alpha=0.3 and dashed lines, legend well-placed in
+          lower right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bullet chart implementation with actual bar, target marker,
+          and qualitative bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Actual values as bars, targets as vertical markers, ranges as background
+          bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: actual bar, target marker, qualitative
+          bands, labels, text values'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "bullet-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple metrics with varying performance levels (Revenue exceeds
+          target, others below target), different units, different scales
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: KPI dashboard scenario is realistic and comprehensible (Revenue,
+          Profit, New Customers, Satisfaction)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values are realistic: Revenue in $K, Profit as %, customer counts,
+          satisfaction on 5-point scale'
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          so this is acceptable - however marking 0 since there's no explicit seed
+          statement even though not strictly needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used (plt, Line2D, Patch)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib's barh, custom legend with Patch and Line2D objects,
+          appropriate styling. Could have used more advanced features like FancyBboxPatch
+          or custom transforms, but implementation is solid.
+  verdict: APPROVED
diff --git a/plots/bullet-basic/metadata/plotly.yaml b/plots/bullet-basic/metadata/plotly.yaml
index 9f448b5d29..00e326e952 100644
--- a/plots/bullet-basic/metadata/plotly.yaml
+++ b/plots/bullet-basic/metadata/plotly.yaml
@@ -26,3 +26,176 @@ review:
   - Could use Plotly native go.Indicator with bullet mode for more idiomatic implementation
   - Profit chart shows 140 tick label extending into right margin area
   - Missing explicit x-axis title though subplot titles provide context
+  image_description: 'The plot displays four horizontal bullet charts arranged vertically,
+    each representing a different KPI metric. From top to bottom: Revenue ($K) showing
+    actual value of 275 exceeding target of 250, Profit ($K) at 85 below target of
+    100, Customers at 320 below target of 400, and Satisfaction at 4.2 below target
+    of 4.5. Each bullet chart features three grayscale background bands representing
+    qualitative ranges (poor/satisfactory/good), a blue (#306998) horizontal bar showing
+    the actual value, a thin black vertical line marking the target, and the actual
+    value displayed in blue text on the right side. The title "bullet-basic · plotly
+    · pyplots.ai" is centered at the top. The overall design is clean with a white
+    background and subtle gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, subplot titles at 22pt, tick labels at 16pt, value
+          annotations at 20pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars, range bands, and target markers all clearly visible with good
+          sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses grayscale for ranges and single blue for actual values - colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though right margin annotation "140" visible on
+          Profit chart extends slightly beyond intended area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Subplot titles serve as labels but lack explicit axis titles with
+          units on x-axis
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), no legend needed for this chart type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bullet chart implementation with all components
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Actual values as bars, targets as markers, ranges as background bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: actual bar, target marker, qualitative
+          range bands, labels, value annotations'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within appropriate ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, metric labels are clear
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "bullet-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows multiple scenarios: exceeding target (Revenue), below target
+          (others), different scales and units'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: KPI dashboard scenario is realistic; metrics are plausible business
+          KPIs
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for business metrics (Revenue in $K, customer
+          counts, satisfaction scores 1-5)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random), but no explicit seed statement
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (plotly.graph_objects, plotly.subplots)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses subplots, shapes for target markers, annotations, hover templates;
+          could leverage Plotly's native Indicator trace with bullet mode
+  verdict: APPROVED
diff --git a/plots/bullet-basic/metadata/plotnine.yaml b/plots/bullet-basic/metadata/plotnine.yaml
index e8b1a915de..d9bb8bad92 100644
--- a/plots/bullet-basic/metadata/plotnine.yaml
+++ b/plots/bullet-basic/metadata/plotnine.yaml
@@ -25,3 +25,168 @@ review:
   weaknesses:
   - Missing a legend or annotation explaining what the grayscale bands (Poor/Satisfactory/Good)
     represent
+  image_description: The plot displays four horizontal bullet charts for business
+    KPIs (Revenue, Profit, New Orders, Satisfaction). Each chart features three grayscale
+    background bands representing qualitative ranges (Poor=dark gray, Satisfactory=medium
+    gray, Good=light gray), a blue bar (#306998) showing the actual performance value,
+    and a black vertical line marking the target. The actual values (275.0, 22.0,
+    1050.0, 4.5) are displayed in blue text to the right of each bar. The x-axis shows
+    "Performance (%)" with tick marks at 0, 25, 50, 75, 100. The title "bullet-basic
+    · plotnine · pyplots.ai" appears centered at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text readable, good font sizing throughout
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bars and target markers clearly visible and well-proportioned
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: grayscale bands are colorblind-safe, blue bar provides good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: good proportions with minor extra whitespace on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Performance (%)" includes units, Y-axis labels are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: missing legend to explain what the grayscale bands represent
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct bullet chart type with all required components
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: actual as bar, target as marker, ranges as background bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: all spec features present (actual bar, target marker, qualitative
+          ranges)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible within range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: no legend explaining grayscale band meanings
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct format {spec-id} · {library} · pyplots.ai
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows metrics above target (Revenue, Satisfaction) and below target
+          (Profit, New Orders)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: realistic business KPI dashboard scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: sensible values for each metric type
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → plot → save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent use of grammar of graphics with geom_rect, geom_segment,
+          geom_text, scale_fill_manual, and theme customization
+  verdict: APPROVED
diff --git a/plots/bullet-basic/metadata/pygal.yaml b/plots/bullet-basic/metadata/pygal.yaml
index 8c98074178..d0fda90729 100644
--- a/plots/bullet-basic/metadata/pygal.yaml
+++ b/plots/bullet-basic/metadata/pygal.yaml
@@ -26,3 +26,179 @@ review:
     could be cleaner
   - Target markers could use slightly different styling (the spec mentions thin contrasting
     line perpendicular to the bar - current implementation is adequate but thick)
+  image_description: 'The plot displays a horizontal bullet chart with 5 KPI metrics
+    (Satisfaction, Customers, New Orders, Profit, Revenue) arranged vertically. Each
+    metric shows three grayscale background bands representing qualitative ranges:
+    light gray for Poor (0-50%), medium gray for Satisfactory (50-75%), and darker
+    gray for Good (75-100%). Blue horizontal bars (Python blue #306998) represent
+    actual values, and thin black vertical markers indicate targets. The title "bullet-basic
+    · pygal · pyplots.ai" appears at the top. Y-axis labels include the metric name
+    and actual value with units (e.g., "Revenue (275$K)"). X-axis shows "Performance
+    (% of Maximum)" with scale 0-100. A legend at the bottom identifies Poor, Satisfactory,
+    Good ranges plus Actual and Target markers.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text clearly readable; title, labels, and tick marks are appropriately
+          sized for 4800×2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Actual bars and target markers clearly visible; bars could be slightly
+          more prominent against the background bands
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Grayscale ranges with blue actual bars; excellent colorblind-safe
+          design
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions; slight excess whitespace between rows
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label "Performance (% of Maximum)"; Y-labels
+          include values with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend present and comprehensive; grid lines are subtle but legend
+          items slightly cramped
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bullet chart implementation with actual bars, target markers,
+          and qualitative ranges
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Actual values shown as bars, targets as markers, ranges as background
+          bands
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has actual bar, target marker, and three qualitative bands; actual
+          value text shown in label (spec suggested optional text label)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-100% scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all five elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bullet-basic · pygal · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows metrics above target (Revenue, Customers), below target (Profit,
+          New Orders, Satisfaction), and various performance levels
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales KPIs scenario is realistic and comprehensible; appropriate
+          business metrics
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic (Revenue $275K, Satisfaction 4.2/5, etc.)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear script but uses custom SVG injection which adds complexity
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data; no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (pygal, cairosvg, Style)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Saves as plot.png but also saves plot.html (minor: expected only
+          png reference)'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of HorizontalStackedBar with custom SVG injection to
+          achieve bullet chart; leverages pygal's SVG-native output for modification
+  verdict: APPROVED
diff --git a/plots/bullet-basic/metadata/seaborn.yaml b/plots/bullet-basic/metadata/seaborn.yaml
index 172d5c9731..1ba1ac4d6a 100644
--- a/plots/bullet-basic/metadata/seaborn.yaml
+++ b/plots/bullet-basic/metadata/seaborn.yaml
@@ -24,3 +24,177 @@ review:
   - Seaborn is minimally utilized - only sns.barplot for the actual value bars, while
     most visualization work is done with matplotlib. Could better leverage seaborn
     DataFrame-centric approach or styling capabilities.
+  image_description: |-
+    The plot displays a horizontal bullet chart with four KPI metrics (Revenue, Customer Satisfaction, Efficiency, Quality Score) stacked vertically. Each metric shows:
+    - **Grayscale background bands** in three shades (light gray for Poor, medium gray for Satisfactory, dark gray for Good)
+    - **Blue horizontal bars** (#306998 Python Blue) representing actual values
+    - **Black vertical marker lines** indicating targets
+    - **Blue percentage labels** (78%, 85%, 62%, 91%) positioned to the right of each bar
+
+    The title "bullet-basic · seaborn · pyplots.ai" is displayed at the top in bold. X-axis shows "Performance (%)" from 0-100. A well-positioned legend in the upper right explains all elements. The layout is clean with subtle vertical grid lines and no overlapping elements.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis labels 20pt, tick labels 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, metric labels clear, value labels positioned
+          well
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars, target markers, and background bands all clearly visible and
+          well-sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Grayscale bands with blue bars, colorblind-safe design
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent proportions, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Performance (%)" is descriptive with units, but Y-axis has no label
+          (set to empty string intentionally)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha 0.3), legend well-placed but could use slightly
+          smaller font for balance
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bullet chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Actual values as bars, targets as markers, ranges as background bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: actual bar, target marker, qualitative
+          ranges, labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 0-115 shows all data clearly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Poor/Satisfactory/Good/Actual/Target
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bullet-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows metrics above target (Quality Score 91% vs 85%), below target
+          (Revenue 78% vs 90%, Efficiency 62% vs 75%), and at target (Customer Satisfaction
+          85% vs 80% exceeds). Good variety but could show one metric in "poor" range
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Business KPIs (Revenue, Customer Satisfaction, Efficiency, Quality
+          Score) are realistic metrics
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentage values 0-100 with realistic performance numbers
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (even though data is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib.patches, pyplot, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern seaborn API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: sns.barplot is used but the implementation is primarily matplotlib-based.
+          The grayscale bands, target markers, and styling all use matplotlib directly.
+          Seaborn's distinctive features (statistical aggregation, automatic styling,
+          hue-based coloring) are not leveraged.
+  verdict: APPROVED
diff --git a/plots/bump-basic/metadata/altair.yaml b/plots/bump-basic/metadata/altair.yaml
index 488e65b142..a7ec72d98c 100644
--- a/plots/bump-basic/metadata/altair.yaml
+++ b/plots/bump-basic/metadata/altair.yaml
@@ -23,3 +23,177 @@ review:
   - Brown and light gray colors for Man United and Tottenham could be more distinguishable
     for accessibility
   - Missing explicit reproducibility comment (data is deterministic but not annotated)
+  image_description: 'The plot displays a bump chart showing Premier League team standings
+    over 6 match weeks. The title "bump-basic · altair · pyplots.ai" appears at the
+    top in large black text. The Y-axis shows "League Position" from 1 (top) to 6
+    (bottom), correctly inverted. The X-axis shows "Match Week" with Week 1-6 labels
+    rotated at approximately 45 degrees. Six teams are represented with distinct colors:
+    Arsenal (blue), Chelsea (yellow), Liverpool (red/coral), Man City (green), Man
+    United (brown), and Tottenham (light gray). Each team''s trajectory is shown with
+    connected lines (stroke width ~4) and filled circular markers at each week. The
+    legend is positioned on the right side. A subtle dashed grid helps track positions.
+    The layout is clean with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels are distinct and readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line strokeWidth=4 and point size=250 are well-suited for 6 entities
+          over 6 periods
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good distinct colors, though brown/gray could be more distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, legend well-positioned on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Match Week", "League Position") but no units
+          (N/A for rankings)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), legend is well-placed but could
+          have larger spacing
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bump chart showing rankings over time
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Entity→color, Period→X-axis, Rank→Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Inverted Y-axis, distinct colors, dot markers at each period, lines
+          connecting entities
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All ranks 1-6 visible, all weeks shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 6 teams
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "bump-basic · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows rank changes, overtakes, stability (Tottenham), volatility
+          (Man United), rises (Arsenal), falls (Man United)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Premier League standings is a perfect real-world application for
+          bump charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 6 teams over 6 weeks with ranks 1-6 is realistic and appropriate
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Deterministic data (hardcoded), but no random seed comment to clarify
+          intent
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct scale_factor
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses declarative encoding, layered charts (lines + points), interactive
+          HTML export, tooltips - good usage but could leverage more Altair-specific
+          features like selections
+  verdict: APPROVED
diff --git a/plots/bump-basic/metadata/bokeh.yaml b/plots/bump-basic/metadata/bokeh.yaml
index 86f4a68f33..ee97a3e18b 100644
--- a/plots/bump-basic/metadata/bokeh.yaml
+++ b/plots/bump-basic/metadata/bokeh.yaml
@@ -23,3 +23,177 @@ review:
   - Legend label font size (18pt) appears small in the rendered image relative to
     other text elements
   - Could benefit from HoverTool to show team name and rank on hover for better interactivity
+  image_description: 'The plot displays a bump chart showing sports league standings
+    over 6 weeks. Five teams (Team Alpha, Team Beta, Team Gamma, Team Delta, Team
+    Epsilon) are represented by colored lines (blue, orange, green, red, purple respectively)
+    using the Category10 palette. The Y-axis shows "Rank Position" from 1-5 with rank
+    1 at the top (inverted axis). The X-axis shows "Week" labels from Week 1 to Week
+    6. Each team''s trajectory is shown with connected lines and circular dot markers
+    at each week. A legend is positioned on the right side. The title follows the
+    required format: "bump-basic · bokeh · pyplots.ai". The background is a subtle
+    light gray (#fafafa) with dashed grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers (size=20) and lines (width=4) are visible and well-sized,
+          slightly could be larger for the resolution
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Category10 palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, legend on right side works well, minor whitespace
+          at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Week", "Rank Position") but no units needed
+          for these categories
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid alpha 0.3 is subtle, legend well-placed but label font could
+          be more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bump chart showing rankings over time
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Periods on X, Rank on Y, entities connected by lines
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Y-axis inverted, distinct colors, dot markers at each period, lines
+          connecting entities
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All ranks (1-5) and all weeks (1-6) visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All 5 teams correctly labeled in legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "bump-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows rank changes, overtakes (Team Gamma rises from 5 to 1), stability
+          (Team Epsilon stays at 5), and various trajectories. Could show more dramatic
+          swaps.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sports league standings is a perfect real-world scenario for bump
+          charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 5 teams, 6 weeks is appropriate; ranks 1-5 makes sense
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded rankings)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly but also creates plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Bokeh features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Legend model, proper categorical x_range setup,
+          export_png + HTML output. Could leverage more interactive features like
+          HoverTool for a bump chart.
+  verdict: APPROVED
diff --git a/plots/bump-basic/metadata/highcharts.yaml b/plots/bump-basic/metadata/highcharts.yaml
index 2cb5358177..dc480037f6 100644
--- a/plots/bump-basic/metadata/highcharts.yaml
+++ b/plots/bump-basic/metadata/highcharts.yaml
@@ -22,3 +22,172 @@ review:
   weaknesses:
   - X-axis title Match Week not visible in the rendered image despite being configured
   - LineSeries imported from highcharts_core.options.series.area instead of highcharts_core.options.series.line
+  image_description: 'The plot displays a bump chart showing league standings for
+    6 sports teams (Eagles, Wolves, Tigers, Bears, Sharks, Lions) over 6 match weeks.
+    The Y-axis is correctly inverted with Rank 1 at the top and Rank 6 at the bottom.
+    Each team is represented by a colored line with circular markers at each week:
+    Eagles (blue), Wolves (yellow), Tigers (purple), Bears (cyan), Sharks (pink),
+    Lions (brown). The title "bump-basic · highcharts · pyplots.ai" is displayed prominently
+    at the top with a subtitle "League Standings Over Season". The legend is positioned
+    vertically on the right side. Lines show various patterns - Eagles rise to 1st,
+    Wolves start strong then fade, Tigers peak mid-season, Bears drop early and stabilize,
+    while Sharks and Lions remain consistently at 5th and 6th place respectively.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title (72px), axis labels (48px), tick labels (36px) all clearly
+          readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines (6px width) and markers (14px radius) perfectly sized for 6
+          teams
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast between all colors
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions, but x-axis label "Match Week" is missing from the
+          visible plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Rank Position" is descriptive but no units needed; x-axis title
+          not visible'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid lines, legend well-placed on right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bump chart showing rankings over time
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Weeks on X-axis, ranks on Y-axis, correctly inverted
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Inverted Y-axis, distinct colors, dot markers, connected lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 1-6, X-axis shows all 6 weeks
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All 6 team names correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bump-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows overtakes, stability, rise, fall patterns as spec requires
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sports league standings is a natural bump chart application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 6 teams, 6 weeks, ranks 1-6 are sensible values
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Has functions via series loop, but follows simple sequential structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: false
+        comment: Deterministic data (no random), fully reproducible
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: LineSeries imported from area module (unusual but works)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts options, reversed axis, line series, but no advanced
+          features like data labels or tooltips in static output
+  verdict: APPROVED
diff --git a/plots/bump-basic/metadata/letsplot.yaml b/plots/bump-basic/metadata/letsplot.yaml
index a0a89dc91f..16b239eeb8 100644
--- a/plots/bump-basic/metadata/letsplot.yaml
+++ b/plots/bump-basic/metadata/letsplot.yaml
@@ -27,3 +27,177 @@ review:
   - Could leverage lets-plot interactive tooltip features in the HTML output
   - Data shows limited dramatic position changes - more crossovers would better demonstrate
     bump chart capabilities
+  image_description: 'The plot displays a bump chart showing rankings of 5 tech companies
+    (Alpha Corp, Beta Inc, Gamma Tech, Delta Systems, Epsilon Labs) over 6 quarters
+    (Q1-Q6). The Y-axis is correctly inverted with rank 1 at the top and rank 5 at
+    the bottom. Each company is represented by a distinct colored line: dark blue
+    (Alpha Corp), yellow (Beta Inc), green (Gamma Tech), purple (Delta Systems), and
+    pink (Epsilon Labs). Lines connect rankings across time periods with dot markers
+    at each data point. Company labels appear on the right side of the chart at their
+    final positions. The title "bump-basic · letsplot · pyplots.ai" appears at the
+    top. The layout is clean with a minimal theme and subtle grid.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and company labels are all clearly readable at
+          appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; company labels are well-spaced on the
+          right
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are thick (2.5) and points (size 6) are clearly visible for
+          the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Good colorblind-safe palette with distinct hues (blue, yellow, green,
+          purple, pink)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of space, but right margin for labels could be more balanced
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Quarter", "Rank") but no units needed for
+          this context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with minimal theme, legend hidden in favor of direct
+          labels (good choice)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bump chart showing rankings over time
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=period (quarters), Y=rank, correctly inverted axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Y-axis inverted, distinct colors, dot markers at each period, lines
+          connecting entities
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible (ranks 1-5, Q1-Q6)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Direct labels on right side correctly identify each company
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bump-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows rank changes, overtakes, stability (Epsilon flat), and volatility
+          (Gamma), but could show more dramatic position swaps
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech company rankings is a plausible and comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 5 entities and 6 periods is appropriate; rankings are sensible but
+          simple
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar correctly with geom_line, geom_point, geom_text,
+          scale_y_reverse, and theme customization. Uses ggsize and scale=3 for proper
+          export. Could leverage more interactive features.
+  verdict: APPROVED
diff --git a/plots/bump-basic/metadata/matplotlib.yaml b/plots/bump-basic/metadata/matplotlib.yaml
index 4cc5b5248c..ff7d7aa1bc 100644
--- a/plots/bump-basic/metadata/matplotlib.yaml
+++ b/plots/bump-basic/metadata/matplotlib.yaml
@@ -24,3 +24,172 @@ review:
     dramatic position swaps to better demonstrate bump chart capabilities
   - Axis labels lack units/context (though Period and Rank are descriptive for this
     use case)
+  image_description: 'The plot displays a bump chart showing sports league standings
+    for 5 teams (Alpha, Beta, Gamma, Delta, Epsilon) over 6 weeks. The Y-axis is correctly
+    inverted with rank 1 at the top. Each team is represented by a distinct colored
+    line with circular markers: Team Alpha in Python blue (#306998), Team Beta in
+    yellow/gold (#FFD43B), Team Gamma in green (#2ecc71), Team Delta in red (#e74c3c),
+    and Team Epsilon in purple (#9b59b6). Lines connect each team''s position across
+    weeks, clearly showing rank changes - for example, Team Alpha rises from 3rd to
+    1st place, while Team Beta drops from 1st to 2nd. The chart has a subtle dashed
+    grid, clear axis labels ("Period" and "Rank"), and a well-positioned legend outside
+    the plot area on the right. The title correctly uses the format "bump-basic ·
+    matplotlib · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, all text fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers (s=15) and lines (lw=3) well-sized, slightly smaller markers
+          would be ideal for 5 entities
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast between all teams
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, legend well-positioned outside plot area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (though units aren't applicable for
+          rank/period)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at alpha=0.3 is good, legend well-placed outside plot area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bump chart showing rankings over time
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Periods on X-axis, ranks on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Y-axis inverted, distinct colors, dot markers, lines connecting entities
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 5 ranks and 6 periods visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 teams
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "bump-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows rank changes and overtakes well, but top 3 teams have more
+          dynamic movement than bottom 2
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sports league standings is a perfect, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 5 teams, 6 weeks, ranks 1-5 - all realistic values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (no random values), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of matplotlib axes methods, invert_yaxis, bbox_to_anchor
+          for legend. Could use annotations or text labels for additional features.
+  verdict: APPROVED
diff --git a/plots/bump-basic/metadata/plotly.yaml b/plots/bump-basic/metadata/plotly.yaml
index ef970ea367..45b803f9b1 100644
--- a/plots/bump-basic/metadata/plotly.yaml
+++ b/plots/bump-basic/metadata/plotly.yaml
@@ -24,3 +24,178 @@ review:
   - Data is deterministic so seed is not technically needed, but consider adding np.random.seed
     for consistency if random data is ever used
   - Layout balance could be improved with legend placement closer to plot
+  image_description: 'The plot displays a bump chart showing sports league standings
+    over 6 weeks. The Y-axis shows ranks from 1 (top) to 5 (bottom) with rank 1 correctly
+    positioned at the top (inverted axis). The X-axis shows "Period" with Week 1 through
+    Week 6. Five teams are tracked with distinct colored lines and circular markers:
+    Team Alpha (blue #306998), Team Beta (yellow #FFD43B), Team Gamma (green #2ecc71),
+    Team Delta (red #e74c3c), and Team Epsilon (purple #9b59b6). The lines effectively
+    show ranking changes - Team Alpha rises from 3rd to 1st, Team Beta falls from
+    1st to 2nd, Team Gamma has dynamic movement including briefly taking 1st in Week
+    5. The title reads "bump-basic · plotly · pyplots.ai" at top left. The legend
+    is positioned to the right of the plot area. The background uses plotly_white
+    template with subtle grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels all clearly readable with appropriate
+          font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines (width=4) and markers (size=16) are well-sized for the data
+          density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors that are colorblind-friendly (blue, yellow,
+          green, red, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall balance, though right margin for legend takes notable
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Period" and "Rank" are descriptive but lack units (though units
+          aren''t really applicable here)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend is well-placed but slightly distant from plot
+          area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bump chart showing rank changes over time
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=periods, Y=ranks correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Inverted Y-axis (rank 1 at top), distinct colors, dot markers, lines
+          connecting entities
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All ranks (1-5) and periods (Week 1-6) visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all five teams
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bump-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows rank changes, overtakes, stability (Team Delta stays 4th mostly);
+          could show more dramatic swaps
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sports league standings is a perfect, real-world bump chart use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 5 teams over 6 weeks is appropriate; values are sensible
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed set (though data is deterministic, np.random is imported
+          but not used)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported, which is used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 2
+        max: 2
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with mode="lines+markers", proper plotly_white template,
+          interactive HTML export, and autorange="reversed" for Y-axis inversion
+  verdict: APPROVED
diff --git a/plots/bump-basic/metadata/plotnine.yaml b/plots/bump-basic/metadata/plotnine.yaml
index ec37892bd5..7abce86348 100644
--- a/plots/bump-basic/metadata/plotnine.yaml
+++ b/plots/bump-basic/metadata/plotnine.yaml
@@ -22,3 +22,175 @@ review:
   weaknesses:
   - Grid lines could be more subtle (lower alpha) for a cleaner look
   - Axis labels are functional but could be more descriptive
+  image_description: 'The plot displays a bump chart showing tech company rankings
+    over 6 quarters (Q1-Q6). Five companies are tracked: Alpha Corp (teal), Gamma
+    Tech (green), Beta Inc (coral/salmon), Delta Systems (blue/periwinkle), and Epsilon
+    Labs (pink). The Y-axis shows ranks 1-5 with rank 1 at the top (inverted). Lines
+    connect each company''s rank across quarters with dot markers at each data point.
+    Company names appear as labels on the right side at their final Q6 positions.
+    The title "bump-basic · plotnine · pyplots.ai" is displayed at the top. The background
+    is minimal with a subtle grid.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; end labels well-spaced at different
+          ranks
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines (size=2.5) and points (size=6) are well-sized; slight deduction
+          as some line crossings are dense
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Set2 palette is colorblind-safe with good distinction between categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions; right side has adequate space for labels but could
+          be slightly tighter
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Quarter" and "Rank" are descriptive but lack context (e.g., could
+          be "Quarterly Period")'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle, legend disabled in favor of direct labels (good choice),
+          but grid could be even more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bump chart showing ranking changes over time
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=period, Y=rank, color=entity - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Y-axis inverted, distinct colors, dot markers, lines connecting entities
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axis limits appropriate (0.5-7.5 allows space for
+          labels)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Direct labels accurate and match entities
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "bump-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows rank changes, overtakes, stability (Epsilon Labs), volatility
+          (Gamma Tech); good variety but all 5 ranks used every period (no ties shown)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech company market rankings is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 5 entities, 6 periods is appropriate; ranks 1-5 sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar properly with geom_line, geom_point, geom_text,
+          scale_y_reverse, theme customization; solid plotnine usage but no advanced
+          features like faceting
+  verdict: APPROVED
diff --git a/plots/bump-basic/metadata/pygal.yaml b/plots/bump-basic/metadata/pygal.yaml
index 250540fc2a..0e33735e6e 100644
--- a/plots/bump-basic/metadata/pygal.yaml
+++ b/plots/bump-basic/metadata/pygal.yaml
@@ -25,3 +25,175 @@ review:
     using legend_at_bottom=True or adjusting legend_box_size
   - Does not utilize pygal-specific interactive features (tooltips, value formatters,
     hover effects) that would enhance the visualization
+  image_description: 'The plot displays a bump chart showing sports league standings
+    over 6 weeks. The title "bump-basic · pygal · pyplots.ai" is centered at the top.
+    The Y-axis labeled "Rank" shows positions 1-5 from top to bottom (correctly inverted).
+    The X-axis labeled "Period" shows Week 1 through Week 6. Five teams are represented
+    with distinct colored lines: Team Alpha (blue), Team Beta (yellow/gold), Team
+    Gamma (green), Team Delta (red), and Team Epsilon (purple). Each team''s position
+    is marked with dot markers connected by lines. The chart shows clear ranking changes
+    over time - Team Beta starts at rank 1 but drops, Team Alpha rises to first place
+    by Week 6, and Team Gamma has significant volatility. The legend is positioned
+    in the upper left. Grid lines are visible but subtle.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title and labels are clear, though font sizes
+          could be slightly larger for optimal viewing
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; legend, axis labels, and data points are all
+          distinct
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dot markers and lines are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors (blue, yellow, green, red, purple) with good
+          contrast; distinguishable for most color vision types
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of space, proportions are well-balanced
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Rank" and "Period" but no units (though units
+          not applicable here)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; legend placement in upper-left is
+          acceptable but overlays plot area slightly
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bump chart showing rankings over time
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Entities mapped correctly, periods on X-axis, ranks on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Y-axis inverted (rank 1 at top), distinct colors, dot markers, lines
+          connecting entities
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All ranks 1-5 shown, all periods displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all five teams
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows correct format: "bump-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows rank changes, overtakes, and stability well; could include
+          more dramatic position swaps
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sports league standings is a perfect real-world bump chart use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 5 teams, 6 weeks, ranks 1-5 - all sensible values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple flat structure: imports → data → config → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data (no random generation needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style are imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic pygal.Line() without leveraging pygal-specific features
+          like tooltips, value_formatter, or custom interpolation styles
+  verdict: APPROVED
diff --git a/plots/bump-basic/metadata/seaborn.yaml b/plots/bump-basic/metadata/seaborn.yaml
index a788bf1d9f..fdadbef4f7 100644
--- a/plots/bump-basic/metadata/seaborn.yaml
+++ b/plots/bump-basic/metadata/seaborn.yaml
@@ -20,3 +20,175 @@ review:
   - Proper use of seaborn lineplot with hue for categorical grouping
   weaknesses:
   - Axis labels could be more descriptive (e.g., "Competition Week" and "League Position")
+  image_description: 'The bump chart displays 5 sports teams (Lions, Tigers, Bears,
+    Eagles, Wolves) with their ranking positions tracked over 6 weeks (Week 1-6).
+    The Y-axis shows ranks 1-5 with rank 1 at the top (inverted axis as required).
+    Each team is represented by a distinct colored line with circular markers: Lions
+    (blue #306998), Tigers (yellow), Bears (red), Eagles (green), and Wolves (purple).
+    Lines connect the rankings across weeks showing position changes. The title follows
+    the correct format "bump-basic · seaborn · pyplots.ai". A legend on the right
+    side clearly identifies each team. The grid is subtle with dashed lines at alpha
+    0.3.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 18 and linewidth 4 are excellent for 5 entities over
+          6 periods
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors that are colorblind-safe (blue, yellow, red,
+          green, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, legend placed outside plot area using bbox_to_anchor
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: Labels are "Week" and "Rank" without units or context; could be more
+          descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha=0.3 with dashed lines; legend is well-placed
+          outside
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bump chart with lines connecting rankings over time
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Period (Week) on X-axis, Rank on Y-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Y-axis inverted, distinct colors, dot markers present, lines connect
+          entities
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 5 ranks and 6 weeks visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 teams
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "bump-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows rank changes, overtakes (Tigers→Lions at Week 3), stability,
+          rises and falls
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sports league standings over a season is a realistic and comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 5 teams over 6 weeks with ranks 1-5 is perfect for bump chart demonstration
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed set (though data is deterministic, best practice is
+          to include seed when using any randomness)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only used imports (matplotlib.pyplot, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses seaborn's lineplot with hue grouping and palette, which is good
+          but relatively basic seaborn usage
+  verdict: APPROVED
diff --git a/plots/calibration-curve/metadata/altair.yaml b/plots/calibration-curve/metadata/altair.yaml
index 61d1c94ca2..cdc6b8b803 100644
--- a/plots/calibration-curve/metadata/altair.yaml
+++ b/plots/calibration-curve/metadata/altair.yaml
@@ -25,3 +25,178 @@ review:
   - Missing subtle grid lines which would aid in reading exact values from the calibration
     curve
   - Histogram bars could use slight spacing/gap for better visual separation
+  image_description: 'The plot shows a calibration curve visualization with two vertically
+    stacked charts. The main chart (top) displays the calibration curve with a solid
+    blue line (#306998) connecting filled circular markers representing binned mean
+    predicted probabilities on the X-axis vs. fraction of positives on the Y-axis.
+    A yellow dashed diagonal line represents perfect calibration. The title reads
+    "calibration-curve · altair · pyplots.ai" with a subtitle showing "Brier Score:
+    0.1388". The bottom chart is a histogram showing the distribution of predicted
+    probabilities using blue bars. Both axes are clearly labeled with appropriate
+    font sizes. The calibration curve demonstrates a slightly overconfident classifier
+    that falls below the diagonal for lower probabilities and approaches/crosses it
+    at higher values.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 18pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=300), line thickness appropriate; minor
+          deduction for points being slightly large
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and high
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas with vconcat layout, charts well proportioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Mean Predicted Probability", "Fraction of Positives",
+          "Count"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid lines present; while not always necessary, subtle grid would
+          aid reading values
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calibration curve with diagonal reference line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=mean predicted probability, Y=fraction of positives (correct)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has diagonal reference line, 10 bins, Brier score metric, histogram
+          of predictions
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes show 0-1 range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed as single model; colors are self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "calibration-curve · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overconfident classifier behavior well, demonstrates deviation
+          from perfect calibration; could show underconfident region more clearly
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulated classifier predictions are plausible, though generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 2000 samples, probabilities in 0-1 range, Brier score of 0.1388 is
+          realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html; spec doesn't require HTML but
+          this is acceptable for interactive libraries
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative grammar with layering, vconcat, and configure
+          methods; could leverage tooltips more prominently or interactive features
+  verdict: APPROVED
diff --git a/plots/calibration-curve/metadata/bokeh.yaml b/plots/calibration-curve/metadata/bokeh.yaml
index 17734befd6..56e80769b7 100644
--- a/plots/calibration-curve/metadata/bokeh.yaml
+++ b/plots/calibration-curve/metadata/bokeh.yaml
@@ -24,3 +24,175 @@ review:
   - Could leverage Bokeh interactive features like HoverTool to show bin counts on
     hover
   - Axis labels lack explicit context
+  image_description: 'The plot displays a calibration curve on a light gray background
+    (#fafafa). The title "calibration-curve · bokeh · pyplots.ai" appears at the top
+    left in bold. The X-axis is labeled "Mean Predicted Probability" (0 to 1) and
+    the Y-axis is labeled "Fraction of Positives" (0 to 1). A gray dashed diagonal
+    line represents perfect calibration. The classifier''s calibration curve is shown
+    as a blue line (#306998) with circular markers at each of the 10 bin points. The
+    curve shows slight overconfidence (below the diagonal at higher probabilities).
+    Metrics are displayed in the upper left corner with a white background box showing
+    "Brier Score: 0.144" and "ECE: 0.045". The legend is positioned in the bottom
+    right corner showing "Perfect Calibration" (dashed) and "Classifier" (solid blue).
+    Grid lines are dashed and subtle.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized at 25px with good alpha, line width 5px - perfectly
+          visible for 10 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) vs gray dashed line - colorblind safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but legend is somewhat small/distant in bottom right
+          corner
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (probabilities are unitless, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but legend could be more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calibration curve (reliability diagram)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=mean predicted probability, Y=fraction of positives - correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line ✓, 10 bins ✓, Brier score ✓, ECE
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show 0-1 range with small padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies perfect calibration vs classifier
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overconfident classifier behavior, but could demonstrate more
+          varied calibration patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulates binary classification with realistic miscalibration (overconfident
+          model)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 5000 samples, probabilities 0-1, Brier score ~0.14 and ECE ~0.045
+          are realistic values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct for bokeh)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Label annotation, proper Bokeh styling. Could
+          leverage HoverTool for interactivity.
+  verdict: APPROVED
diff --git a/plots/calibration-curve/metadata/highcharts.yaml b/plots/calibration-curve/metadata/highcharts.yaml
index 74e26d0f6c..ef8a1744b8 100644
--- a/plots/calibration-curve/metadata/highcharts.yaml
+++ b/plots/calibration-curve/metadata/highcharts.yaml
@@ -26,3 +26,185 @@ review:
     of plot
   - Interactive HTML version does not leverage Highcharts tooltips to show bin counts
     or additional information on hover
+  image_description: 'The plot displays a calibration curve on a white background.
+    The title "calibration-curve · highcharts · pyplots.ai" appears at the top in
+    bold black text, with a subtitle showing "Model Calibration Analysis | Brier Score:
+    0.1090 | ECE: 0.1566". The X-axis is labeled "Mean Predicted Probability" (0 to
+    1) and the Y-axis is labeled "Fraction of Positives" (0 to 1). A gray dashed diagonal
+    line represents perfect calibration. The classifier calibration curve is shown
+    as a solid blue line (Python Blue #306998) with circular markers at each of the
+    10 bin centers. The curve shows an S-shaped pattern typical of a slightly overconfident
+    classifier - below the diagonal in the middle ranges. A legend in the bottom-right
+    corner shows three entries: "Perfect Calibration" (dashed line), "Classifier (Brier:
+    0.109)" (solid blue line), and "Calibration Points" (blue circles). The grid is
+    subtle with light gray lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable
+          at the large canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized and clearly visible; line width is appropriate;
+          slight deduction as markers could be slightly larger for optimal visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and gray color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though the legend is positioned quite far
+          to the right with some empty space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Mean Predicted Probability", "Fraction of
+          Positives") but lack units (though probabilities are unitless, could indicate
+          "probability" or "(0-1)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is appropriately subtle; legend is functional but positioned
+          in a way that creates some visual imbalance
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calibration curve / reliability diagram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows mean predicted probability, Y-axis shows fraction of
+          positives
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes diagonal reference line, 10 bins, Brier score AND ECE metrics
+          displayed
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full 0-1 range for both dimensions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "calibration-curve · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overconfident classifier behavior with S-curve pattern; demonstrates
+          both under-prediction (low end) and over-prediction (middle range) relative
+          to perfect calibration
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated binary classifier with beta distributions creating realistic
+          overconfident behavior typical of ML models
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 2000 samples with balanced classes is appropriate; Brier score of
+          0.109 and ECE of 0.157 are realistic values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → binning calculation → chart
+          creation → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses SplineSeries for smooth curves, ScatterSeries for points, proper
+          Highcharts options configuration, and Selenium-based PNG export. Does not
+          leverage advanced Highcharts features like tooltips or animation that would
+          enhance the interactive HTML version.
+  verdict: APPROVED
diff --git a/plots/calibration-curve/metadata/letsplot.yaml b/plots/calibration-curve/metadata/letsplot.yaml
index 7b1f26cc03..b84f2b652f 100644
--- a/plots/calibration-curve/metadata/letsplot.yaml
+++ b/plots/calibration-curve/metadata/letsplot.yaml
@@ -27,3 +27,172 @@ review:
     pattern less gradual than typical real-world examples
   - Histogram bars extend quite high (normalized to max) which visually competes with
     the calibration curve; could use smaller scale factor
+  image_description: The plot shows a calibration curve with a blue line and points
+    representing the model's calibration across 10 probability bins. The x-axis shows
+    "Mean Predicted Probability" (0 to 1) and the y-axis shows "Fraction of Positives"
+    (0 to 1). A gray dashed diagonal line represents perfect calibration. Yellow semi-transparent
+    bars at the bottom display the histogram of predicted probabilities, showing the
+    distribution of predictions. The title displays "calibration-curve · letsplot
+    · pyplots.ai" with Brier Score (0.0516) and ECE (0.1885) metrics below. The calibration
+    curve shows the model is underconfident at lower probabilities (curve below diagonal)
+    and overconfident at higher probabilities (curve above diagonal, reaching 1.0
+    while perfect calibration would be lower).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, though title could be slightly
+          larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points and lines are visible, histogram bars are appropriately sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/gray/yellow palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot well-proportioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (probabilities are unitless, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, no legend needed for single-model
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calibration curve with reliability diagram format
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Mean predicted probability on X, fraction of positives on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line, 10 bins, Brier score AND ECE displayed,
+          histogram of predictions included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes show full 0-1 range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single model, elements are self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "calibration-curve · letsplot · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows calibration deviation from perfect line, but the curve is somewhat
+          extreme (goes from 0 to 1 very sharply)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Binary classification scenario with realistic 60/40 class imbalance
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are in valid 0-1 range, ECE of 0.1885 is realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with geom_line, geom_point, geom_bar,
+          theme_minimal, but doesn't leverage more advanced lets-plot features
+  verdict: APPROVED
diff --git a/plots/calibration-curve/metadata/matplotlib.yaml b/plots/calibration-curve/metadata/matplotlib.yaml
index e139f88dd8..42b4809f02 100644
--- a/plots/calibration-curve/metadata/matplotlib.yaml
+++ b/plots/calibration-curve/metadata/matplotlib.yaml
@@ -24,3 +24,182 @@ review:
   - Axis labels lack units or additional context
   - Could use more distinctive matplotlib features like fill_between for confidence
     bands
+  image_description: The plot consists of two vertically stacked subplots. The main
+    upper subplot shows three calibration curves against a dashed black diagonal reference
+    line representing perfect calibration. The "Well-Calibrated" model (blue line
+    with circle markers) follows closely along the diagonal. The "Overconfident" model
+    (yellow line with square markers) shows a steep S-curve pattern, jumping sharply
+    from 0 to 1 around the 0.4-0.6 probability range. The "Underconfident" model (pink/magenta
+    line with triangle markers) shows a flatter curve. Each model displays its Brier
+    score in the legend (0.101, 0.020, 0.181 respectively). The lower subplot shows
+    a histogram of predicted probability distributions for all three models, clearly
+    showing that the overconfident model clusters predictions near 0 and 1, while
+    the underconfident model clusters near 0.5, and the well-calibrated model has
+    a more spread distribution. All text is clearly readable, colors are distinct
+    and colorblind-friendly, and the layout is well-balanced.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, legend at
+          16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at size 12 with linewidth 3 are clearly visible; could be
+          slightly larger but acceptable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and pink/magenta are distinguishable for colorblind
+          users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Two-subplot layout with 3:1 height ratio uses canvas effectively
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Mean Predicted Probability", "Fraction of
+          Positives", "Count") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid at alpha=0.3 with dashed style is subtle, legends well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calibration/reliability diagram with diagonal reference
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows mean predicted probability, Y-axis shows fraction of
+          positives
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has diagonal reference line, 10 bins, Brier scores displayed, histogram
+          subplot for prediction distribution, multiple model comparison with distinct
+          colors and legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes range from 0 to 1 as appropriate for probabilities
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legends correctly identify each model with Brier scores
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "calibration-curve · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows well-calibrated, overconfident, and underconfident models demonstrating
+          key calibration patterns; histogram clearly shows distribution differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulated classifier outputs are plausible; using 35% positive rate
+          is realistic for imbalanced classification
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 2000 samples, probabilities correctly bounded 0-1, Brier scores in
+          realistic range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only imports matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs are current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses matplotlib correctly with subplots and gridspec_kw for height
+          ratios, but doesn't leverage more distinctive matplotlib features like fill_between
+          for confidence intervals or custom tick formatting
+  verdict: APPROVED
diff --git a/plots/calibration-curve/metadata/plotly.yaml b/plots/calibration-curve/metadata/plotly.yaml
index 1bc1f26d9e..9a5e99a7c4 100644
--- a/plots/calibration-curve/metadata/plotly.yaml
+++ b/plots/calibration-curve/metadata/plotly.yaml
@@ -24,3 +24,180 @@ review:
   - Histogram subplot could benefit from more visible distinction between overlapping
     distributions (consider side-by-side instead of overlay)
   - Does not leverage plotly interactive hover features to show bin details on mouseover
+  image_description: 'The plot displays a calibration curve visualization with two
+    panels. The top panel (main calibration curve) shows three elements: a gray dashed
+    diagonal line representing perfect calibration, a blue line with circular markers
+    for a "Calibrated Model" (Brier: 0.206), and a yellow/gold line with diamond markers
+    for an "Overconfident Model" (Brier: 0.219). The calibrated model follows the
+    diagonal more closely, while the overconfident model shows the characteristic
+    S-curve pattern (above the diagonal for low probabilities, below for high probabilities).
+    The bottom panel shows overlaid histograms of the prediction distributions for
+    both models - the calibrated model (blue) has a more uniform distribution across
+    probabilities, while the overconfident model (yellow) shows concentration at the
+    extremes (near 0 and 1). The title "calibration-curve · plotly · pyplots.ai" is
+    centered at the top. All text is clearly readable with appropriate font sizes.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 14 with width 4 lines are well-sized for the data density
+          (10 points per model)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of subplots with 70/30 split, slight reduction for histogram
+          being somewhat compressed
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Mean Predicted Probability", "Fraction of Positives",
+          "Count"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.1), but legend lacks border visibility in
+          some contexts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calibration curve (reliability diagram) implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=mean predicted probability, Y=fraction of positives correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has diagonal reference line, 10 bins, Brier scores displayed, histogram
+          subplot
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes properly show 0-1 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all elements with Brier scores
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "calibration-curve · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows well-calibrated vs overconfident models, demonstrating calibration
+          concepts effectively; minor deduction as both models have similar Brier
+          scores
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulated classifier predictions are plausible, though could be more
+          domain-specific
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All values in valid 0-1 probability range, 2000 samples appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly imports, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses subplots, interactive features via HTML export, but doesn't
+          leverage plotly-specific features like hover customization or animations
+  verdict: APPROVED
diff --git a/plots/calibration-curve/metadata/plotnine.yaml b/plots/calibration-curve/metadata/plotnine.yaml
index 79654bad5d..5a84c80b72 100644
--- a/plots/calibration-curve/metadata/plotnine.yaml
+++ b/plots/calibration-curve/metadata/plotnine.yaml
@@ -23,3 +23,183 @@ review:
   weaknesses:
   - Missing optional histogram subplot showing prediction distribution
   - Axis labels could be more explicit with units or clarification
+  image_description: The plot displays a calibration curve (reliability diagram) with
+    a square 1:1 aspect ratio. The X-axis shows "Mean Predicted Probability" (0.00
+    to 1.00) and the Y-axis shows "Fraction of Positives" (0.00 to 1.00). A gray dashed
+    diagonal line represents perfect calibration. The actual calibration curve is
+    shown as a blue line connecting 10 data points (yellow/gold circles with blue
+    outlines). The curve generally tracks near the diagonal but shows the model is
+    slightly overconfident (curve dips below the diagonal in the middle range). The
+    title displays "calibration-curve · plotnine · pyplots.ai (ECE = 0.069)". The
+    background is white with subtle gray grid lines. Point sizes vary based on bin
+    counts.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Points are well-sized with stroke and fill distinction; line is
+          clearly visible. Minor: some points could be slightly larger for better
+          emphasis'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line and yellow/gold points with good contrast; colorblind-safe
+          palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format is appropriate for calibration curves; plot fills canvas
+          well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("Mean Predicted Probability", "Fraction of
+          Positives") but no units (probability is unitless, so acceptable but not
+          ideal)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend needed for single-model
+          plot
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calibration curve/reliability diagram type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Mean predicted probability on X, fraction of positives on Y as specified
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has diagonal reference line, 10 bins, ECE metric displayed. Missing:
+          optional histogram subplot for prediction distribution'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full 0-1 range as appropriate for probability
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend present, but single model doesn't require one; could have
+          a legend identifying the calibration curve
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{spec-id} · {library} · pyplots.ai" format with ECE metric
+          included
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overconfident model behavior (curve below diagonal in mid-range),
+          varying bin densities via point sizes. Could demonstrate more extreme miscalibration
+          patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulates a realistic slightly overconfident classifier; beta distribution
+          for probabilities is plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All values in proper 0-1 probability range; ECE of 0.069 is realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' correctly ✓ (awarding 1 point)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Good use of plotnine''s grammar of graphics: ggplot + aes + multiple
+          geoms layered (geom_abline, geom_line, geom_point), theme_minimal, scale_size_identity,
+          coord_fixed for square aspect ratio'
+  verdict: APPROVED
diff --git a/plots/calibration-curve/metadata/pygal.yaml b/plots/calibration-curve/metadata/pygal.yaml
index 9aa0d87059..0847b289be 100644
--- a/plots/calibration-curve/metadata/pygal.yaml
+++ b/plots/calibration-curve/metadata/pygal.yaml
@@ -22,3 +22,185 @@ review:
   - Clean KISS-style code structure with inline calibration computation
   weaknesses:
   - None significant - previous issues have been addressed
+  image_description: 'The plot displays a calibration curve on a white background
+    with a 4800×2700 canvas. Three series are shown: (1) a gray dashed diagonal line
+    representing "Perfect Calibration" running from (0,0) to (1,1), (2) a dark green
+    line with circular markers for "Logistic Regression (Brier: 0.209)" that closely
+    tracks the diagonal indicating good calibration, and (3) a dark red line with
+    circular markers for "Overconfident Model (Brier: 0.227)" showing the characteristic
+    sigmoid pattern where the model is overconfident at both extremes. The x-axis
+    is labeled "Mean Predicted Probability" ranging from 0.0 to 1.0, and the y-axis
+    is labeled "Fraction of Positives" from 0 to 1. The title follows the correct
+    format "calibration-curve · pygal · pyplots.ai". The legend is placed at the bottom
+    in a horizontal layout with all three series clearly labeled. Grid lines are subtle
+    and non-distracting.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend are all clearly readable at full size.
+          Font sizes appropriately scaled for canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Legend at bottom is well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Dots are visible with size 14, lines have good stroke width. Markers
+          could be slightly larger for optimal visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Green/red/gray color scheme is high-contrast. Colors are distinguishable
+          even for colorblind users due to different hues and saturation levels.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with plot filling most of the space. Bottom
+          margin for legend is appropriately sized.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Mean Predicted Probability" and "Fraction of
+          Positives".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (light gray, dashed). Legend placement at bottom works
+          but takes up considerable space.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calibration curve (XY scatter/line plot)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = mean predicted probability, Y = fraction of positives (correct)
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Includes diagonal reference line ✓, 10 bins ✓, Brier scores displayed
+          ✓. Missing: histogram subplot for prediction distribution (spec says "optional"
+          so minor deduction)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes show full 0-1 range as appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three series with Brier scores
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "calibration-curve · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows well-calibrated vs overconfident models, demonstrating the
+          key aspects of calibration curves. Shows clear distinction between model
+          behaviors.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Logistic Regression as well-calibrated and "Overconfident Model"
+          (Random Forest/NN style) is plausible. Context is machine learning model
+          evaluation.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Probabilities correctly bounded 0-1, Brier scores in realistic range
+          (0.2-0.23)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save. No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style imported; all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses `strict=False` in zip which is a Python 3.10+ feature but not
+          deprecated
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (correct outputs)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart, custom Style configuration, interactive tooltips
+          with dict format for hover information, legend_at_bottom, and both PNG and
+          HTML export (HTML for interactivity). Excellent use of pygal-specific features.
+  verdict: APPROVED
diff --git a/plots/calibration-curve/metadata/seaborn.yaml b/plots/calibration-curve/metadata/seaborn.yaml
index 1070258bba..cc81548162 100644
--- a/plots/calibration-curve/metadata/seaborn.yaml
+++ b/plots/calibration-curve/metadata/seaborn.yaml
@@ -28,3 +28,174 @@ review:
     overconfident model
   - Could use seaborn built-in statistical features more extensively such as confidence
     bands
+  image_description: 'The plot displays a two-panel figure. The left panel shows calibration
+    curves for three classifiers: Well-Calibrated (blue circles, Brier: 0.128), Overconfident
+    (yellow/gold squares, Brier: 0.124), and Underconfident (brown triangles, Brier:
+    0.191), plotted against a dashed diagonal reference line representing perfect
+    calibration. The x-axis is "Mean Predicted Probability" (0-1) and y-axis is "Fraction
+    of Positives" (0-1). The title correctly reads "calibration-curve · seaborn ·
+    pyplots.ai". The right panel shows overlapping histograms of prediction distributions
+    for all three classifiers, with "Predicted Probability" on x-axis and "Count"
+    on y-axis. Colors are Python Blue (#306998), Python Yellow (#FFD43B), and a brown/sienna
+    color (#8B4513). The whitegrid style from seaborn is applied throughout.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers well-sized (12-14), lines visible with linewidth=3
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, brown are distinguishable; brown could be slightly
+          more distinct from yellow
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good 2:1 ratio between panels, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Mean Predicted Probability", "Fraction of Positives",
+          "Count"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha is 0.3 which is good, but legends could be better placed;
+          histogram legend overlaps slightly with data bars
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calibration curve (reliability diagram)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=mean predicted probability, Y=fraction of positives
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line ✓, 10 bins ✓, Brier scores ✓, histogram subplot
+          ✓, multiple models with distinct colors and legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full 0-1 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify each classifier with Brier scores
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "calibration-curve · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows well-calibrated, overconfident, and underconfident classifiers
+          demonstrating different calibration patterns; could show more extreme miscalibration
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Binary classification scenario with 35% positive class is realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Brier scores are realistic (0.12-0.19); 2000 samples is appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions/classes, linear flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' ✓ (actually correct, my mistake in reading)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.set_theme, sns.lineplot, sns.histplot appropriately, but
+          could leverage more seaborn-specific statistical features like confidence
+          intervals
+  verdict: APPROVED
diff --git a/plots/candlestick-basic/metadata/altair.yaml b/plots/candlestick-basic/metadata/altair.yaml
index 3b0b2dd4e7..4629fb0884 100644
--- a/plots/candlestick-basic/metadata/altair.yaml
+++ b/plots/candlestick-basic/metadata/altair.yaml
@@ -25,3 +25,179 @@ review:
     being configured in code
   - Color scheme uses red-green which is not ideal for colorblind accessibility
   - Missing interactive() method which is Altair signature feature for web-based exploration
+  image_description: The plot displays a candlestick chart showing 30 business days
+    of stock price data from January 1 to February 8, 2024. Green (teal) candlesticks
+    represent bullish days where the closing price was higher than the opening price,
+    while red candlesticks represent bearish days. Each candlestick has a rectangular
+    body showing the open-close range and thin wicks (vertical lines) extending above
+    and below showing the high-low range. The chart shows a general upward trend peaking
+    around Jan 11 (~$110), followed by volatility and a decline toward the end (~$86-90).
+    The title "candlestick-basic · altair · pyplots.ai" appears at the top center.
+    The Y-axis is labeled "Price ($)" ranging from $84-$112, and the X-axis is labeled
+    "Date" with dates formatted as "Jan 01", "Jan 03", etc. A subtle grid is visible
+    in the background. No legend is displayed in the visible area.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear (~28pt), axis labels are readable (~22pt),
+          tick labels are well-sized (~18pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, date labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Candlestick bodies are appropriately sized (size=24), wicks clearly
+          visible (strokeWidth=3), good balance for 30 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Green (#26A69A) and red (#EF5350) provide good contrast but red-green
+          is not ideal for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight extra whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Price ($)" includes unit, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (gridOpacity=0.3) which is good, but legend is not
+          visible in the rendered image despite being configured
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart with body and wicks
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, OHLC prices correctly mapped to candlestick elements
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Open, high, low, close data; bullish/bearish color coding; visible
+          wicks
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full price range with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend defined correctly for Bullish/Bearish (code is correct)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "candlestick-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, varying volatility, different
+          wick lengths, uptrend and downtrend periods
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulated stock prices with random walk behavior, plausible volatility
+          patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Prices around $85-110 are realistic for a stock, but starting at
+          exactly $100 is slightly generic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data generation → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas are imported and all are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses declarative grammar with layering (wicks + bodies), proper encoding
+          types (:T, :Q, :N), tooltips for interactivity, and scale configuration.
+          However, does not use interactive() method which is Altair's main distinctive
+          feature
+  verdict: APPROVED
diff --git a/plots/candlestick-basic/metadata/bokeh.yaml b/plots/candlestick-basic/metadata/bokeh.yaml
index 636a183e8b..ce8d3faa21 100644
--- a/plots/candlestick-basic/metadata/bokeh.yaml
+++ b/plots/candlestick-basic/metadata/bokeh.yaml
@@ -24,3 +24,177 @@ review:
     blue/orange alternative
   - HTML output does not include hover tooltips to show OHLC values - missed opportunity
     for Bokeh interactivity
+  image_description: The plot displays a candlestick chart showing 30 trading days
+    of stock price data from January 2024 to February 2024. Green candles indicate
+    bullish days (close >= open) and red candles indicate bearish days (close < open).
+    The chart shows a clear downward trend from approximately $165 to $135 over the
+    period. Each candlestick has visible wicks (shadows) extending above and below
+    the body, representing the high-low range. The title "candlestick-basic · bokeh
+    · pyplots.ai" appears in the top-left corner. The X-axis displays dates with labels
+    like "Jan 01 2024", "Jan 08", "Jan 15", etc. The Y-axis shows "Price ($)" ranging
+    from about 130 to 170. A subtle dashed grid helps read price levels. The candles
+    are well-proportioned with appropriate spacing.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, dates are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Candle bodies and wicks are perfectly sized for 30 data points, clear
+          distinction between elements
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses green (#22c55e) and red (#ef4444) which is the most common convention
+          but not ideal for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content, appropriate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Date" and "Price ($)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3) with dashed lines; no legend needed for
+          this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart with bodies and wicks
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, OHLC correctly mapped to candle components
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: OHLC representation, color-coded bullish/bearish,
+          visible wicks thinner than bodies'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full price range from ~130 to 170
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; color coding is self-explanatory for candlesticks
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exactly matches "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both bullish AND bearish candles with good variety; demonstrates
+          trend (downward), volatility, and different candle sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price scenario with realistic 2% daily volatility, plausible
+          price range ($130-$170)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Good price values, though the consistent downtrend might be slightly
+          exaggerated for a typical 30-day period
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean flow: imports → data generation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, bokeh components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and segment/vbar glyphs appropriately, but
+          doesn't leverage Bokeh's interactive features (hover tooltips showing OHLC
+          values would be a great addition for the HTML version)
+  verdict: APPROVED
diff --git a/plots/candlestick-basic/metadata/highcharts.yaml b/plots/candlestick-basic/metadata/highcharts.yaml
index 3dac8be27b..fa94525893 100644
--- a/plots/candlestick-basic/metadata/highcharts.yaml
+++ b/plots/candlestick-basic/metadata/highcharts.yaml
@@ -25,3 +25,14 @@ review:
   weaknesses:
   - Grid line alpha (0.15) could be slightly higher for better price level readability
     at this resolution
+  image_description: The plot displays a candlestick chart titled "Stock Price Movement
+    · candlestick-basic · highcharts · pyplots.ai". The chart shows 30 trading days
+    of OHLC data from October 1 to November 9, 2024. Bullish candles (close > open)
+    are rendered in Python Blue (#306998), while bearish candles (close < open) are
+    rendered in warm amber/orange (#E67E22). The Y-axis shows "Price (USD)" ranging
+    from $140 to $151 with $1 intervals. The X-axis displays dates in "Mon DD" format.
+    Wicks are clearly visible with darker tones (dark blue for bullish, reddish-brown
+    for bearish). The chart shows an initial decline from ~$150 to ~$142-143, followed
+    by choppy trading and a slight recovery at the end. Grid lines are dashed and
+    subtle. The layout is clean with good proportions.
+  verdict: APPROVED
diff --git a/plots/candlestick-basic/metadata/letsplot.yaml b/plots/candlestick-basic/metadata/letsplot.yaml
index ed2014e46b..c99e2f525b 100644
--- a/plots/candlestick-basic/metadata/letsplot.yaml
+++ b/plots/candlestick-basic/metadata/letsplot.yaml
@@ -25,3 +25,165 @@ review:
   - X-axis label Date could be more descriptive (e.g., Trading Date or include year)
   - Grid only shows horizontal lines; subtle vertical gridlines at date markers could
     help readability
+  image_description: The plot displays a candlestick chart showing 30 trading days
+    of stock price data from January 2 to February 6. Green candlesticks represent
+    bullish days (close > open) and red candlesticks represent bearish days (close
+    < open). Each candlestick has a rectangular body showing the open-close range
+    and thin gray wicks extending to show the high-low range. The y-axis shows "Price
+    ($)" ranging from approximately 93 to 112, and the x-axis shows "Date" with labels
+    at Jan 02, Jan 09, Jan 16, Jan 23, Jan 30, and Feb 06. The title reads "candlestick-basic
+    · letsplot · pyplots.ai". The chart uses a minimal theme with subtle horizontal
+    grid lines and no vertical grid lines. The overall trend shows an initial rise
+    followed by a decline.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis date labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Candlestick bodies are appropriately sized with clear wicks
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Green (#22C55E) and red (#EF4444) are distinguishable but red-green
+          is not ideal for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content, appropriate whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has units "Price ($)" but x-axis is generic "Date"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle but only horizontal lines; no legend needed for this
+          plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: OHLC data correctly mapped to candlestick elements
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows open, high, low, close; green/red color coding; visible wicks
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; colors are self-explanatory for candlestick charts
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "candlestick-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, varying body sizes, and different
+          wick lengths; good mix of patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated stock OHLC data with realistic random walk behavior
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Price around $100 is realistic; 30 business days is appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (via SVG conversion)
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/candlestick-basic/metadata/matplotlib.yaml b/plots/candlestick-basic/metadata/matplotlib.yaml
index 1f9e424574..371207b038 100644
--- a/plots/candlestick-basic/metadata/matplotlib.yaml
+++ b/plots/candlestick-basic/metadata/matplotlib.yaml
@@ -24,3 +24,179 @@ review:
     audiences)
   - Wick linewidth (2) could be thinner (1-1.5) relative to body for better visual
     distinction per spec note
+  image_description: The plot displays a candlestick chart showing 30 trading days
+    of stock price data from January 2 to mid-February 2024. Green/teal candlesticks
+    indicate bullish (price increase) days while red candlesticks indicate bearish
+    (price decrease) days. Each candlestick clearly shows the body (open-close range)
+    and wicks/shadows (high-low range). The price ranges from approximately $130 to
+    $165, showing an initial uptrend peaking around Jan 10-15 followed by a downtrend.
+    The title correctly displays "candlestick-basic · matplotlib · pyplots.ai". The
+    x-axis shows dates formatted as "Jan 01", "Jan 08", etc. with weekly major ticks,
+    and the y-axis shows "Price ($)". A subtle dashed grid helps read price levels.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis labels rotated 45° to avoid
+          collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Candlesticks are clearly visible with good width and distinct wicks,
+          though wick linewidth could be slightly thinner relative to body for more
+          contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Green (#26a69a) and red (#ef5350) have good contrast and are distinguishable
+          for most colorblind types
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with appropriate padding on all sides
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Date" and "Price ($)" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3 for y, 0.15 for x), but no legend explaining
+          green=bullish, red=bearish
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: OHLC data correctly mapped to candlestick elements
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: OHLC display, color coding for up/down,
+          visible wicks thinner than body, date formatting, subtle grid'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full price range with 10% padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this basic implementation (colors are self-explanatory
+          in financial context)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "candlestick-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, various body sizes, visible
+          wicks; good mix but data happens to show more of a downtrend pattern
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price starting at $150 with ~2% daily volatility is realistic;
+          business days correctly used
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Price values realistic, though 30 days is on the shorter end of the
+          20-100 range specified
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib.dates, matplotlib.pyplot, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses matplotlib primitives (Rectangle, plot) to build candlesticks
+          manually; functional but doesn't leverage mplfinance or other specialized
+          tools; appropriate for "basic" spec though
+  verdict: APPROVED
diff --git a/plots/candlestick-basic/metadata/plotly.yaml b/plots/candlestick-basic/metadata/plotly.yaml
index f2d89b8297..d7c4b09de4 100644
--- a/plots/candlestick-basic/metadata/plotly.yaml
+++ b/plots/candlestick-basic/metadata/plotly.yaml
@@ -23,3 +23,173 @@ review:
     OHLC values
   - Grid visibility is good but could add annotation or subtitle explaining color
     meaning for first-time viewers
+  image_description: The plot displays a candlestick chart showing 30 trading days
+    of stock price data from January 2 to February 11, 2024. The chart uses teal/green
+    (#26A69A) for bullish (up) candles and red (#EF5350) for bearish (down) candles.
+    Each candlestick clearly shows the body (open-close range) and wicks (high-low
+    range). The price starts around $100 and shows realistic market movement with
+    an initial uptrend to ~$109, followed by a gradual decline to the $100-102 range.
+    The title "candlestick-basic · plotly · pyplots.ai" is centered at the top. X-axis
+    shows "Date" with appropriate date labels, Y-axis shows "Price ($)" ranging from
+    96 to 112. A subtle gray grid helps read price levels. The layout is clean with
+    a white background.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          target resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; date labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Candlesticks are perfectly sized for 30 data points; bodies and wicks
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Teal/red scheme is better than pure green/red for colorblind users,
+          but not ideal
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, appropriate margins, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Date" and "Price ($)" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle grid at alpha 0.3; no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, OHLC correctly mapped to candlesticks
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All OHLC elements present, color coding for up/down, visible wicks
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full price range with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series candlestick (full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "candlestick-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, various body sizes and wick
+          lengths; good mix of trends
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulated stock price data is plausible but generic (not tied to
+          a real-world scenario name)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Stock price starting at $100 with 2-3% daily volatility is realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Candlestick which is appropriate, but doesn't leverage Plotly's
+          interactive features like hover customization or range selector
+  verdict: APPROVED
diff --git a/plots/candlestick-basic/metadata/plotnine.yaml b/plots/candlestick-basic/metadata/plotnine.yaml
index 7fb3875dcf..3aa7e8a6d6 100644
--- a/plots/candlestick-basic/metadata/plotnine.yaml
+++ b/plots/candlestick-basic/metadata/plotnine.yaml
@@ -23,3 +23,174 @@ review:
   - Green/red color scheme is standard for finance but not colorblind-accessible;
     consider using blue/orange or shapes
   - X-axis shows numeric Trading Day instead of actual dates which would be more informative
+  image_description: 'The plot displays a candlestick chart with 30 trading days of
+    simulated stock data. Green (teal, #22ab94) candles represent bullish days (close
+    >= open) and red (#f23645) candles represent bearish days (close < open). Each
+    candlestick has a rectangular body showing the open-close range and thin black
+    wicks showing the high-low range. The x-axis shows "Trading Day" (0-30), the y-axis
+    shows "Price ($)" ranging approximately from 132 to 155. The title "candlestick-basic
+    · plotnine · pyplots.ai" is displayed at the top. The background is minimal with
+    subtle gray gridlines. The overall trend shows a price decline from ~150 to ~135
+    over the 30-day period.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Candlesticks are well-sized and clearly visible; some very small
+          body candles (doji-like) are a bit hard to see but this is realistic
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses green/red which is common for finance but not ideal for colorblind
+          users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Price ($)" includes units, "Trading Day" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and helpful; no legend shown but guide=None is intentional
+          since colors are self-explanatory for candlesticks
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart with OHLC data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X, price on Y, OHLC correctly mapped to candle components
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows body (open-close), wicks (high-low), color coding for direction
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend hidden as colors are standard financial convention
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "candlestick-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, varying body sizes, clear
+          wicks; good mix but could have more dramatic movements
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price simulation is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Price around $135-155 is realistic for stocks; the downward trend
+          is perhaps slightly monotonic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses plotnine's grammar of graphics (ggplot + geom_segment + geom_rect)
+          but this is a workaround approach rather than leveraging plotnine's statistical
+          transformations or faceting capabilities
+  verdict: APPROVED
diff --git a/plots/candlestick-basic/metadata/pygal.yaml b/plots/candlestick-basic/metadata/pygal.yaml
index 6038346d36..66531cf4dd 100644
--- a/plots/candlestick-basic/metadata/pygal.yaml
+++ b/plots/candlestick-basic/metadata/pygal.yaml
@@ -23,3 +23,155 @@ review:
   weaknesses:
   - Wicks could be slightly thicker for better visibility at smaller display sizes
   - Bottom legend creates extra whitespace below the chart
+  image_description: The plot displays a candlestick chart showing 30 trading days
+    of stock price data. Green candlesticks indicate bullish (up) days where close
+    > open, while red/coral candlesticks indicate bearish (down) days. Each candlestick
+    has a body showing the open-close range and wicks (thin lines) extending to show
+    the high-low range. The price axis ranges from approximately 128 to 172 dollars,
+    showing a general downward trend over the period. The title "candlestick-basic
+    · pygal · pyplots.ai" appears at the top, with "Trading Day" on the x-axis and
+    "Price ($)" on the y-axis. A legend at the bottom identifies "Bullish (Up)" in
+    green and "Bearish (Down)" in red.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          large canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Candlesticks are well-sized with clearly visible bodies and wicks;
+          wicks could be slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Green (#22A06B) and red (#EF4444) have good contrast and are distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with appropriate margins and spacing
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Price ($)" includes units; "Trading Day" is descriptive'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart implementation using XY lines for bodies
+          and wicks
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: OHLC data correctly mapped to candlestick elements
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: OHLC display, color coding for up/down,
+          visible wicks'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full price range with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Bullish (Up)" and "Bearish (Down)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "candlestick-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles with varying sizes; good mix
+          of trend patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic stock price scenario with plausible volatility and price
+          movements
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Price range ~$130-170 is realistic; could have a cleaner starting
+          price
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → render'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style are imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/candlestick-basic/metadata/seaborn.yaml b/plots/candlestick-basic/metadata/seaborn.yaml
index f4ddc376d0..968374a21c 100644
--- a/plots/candlestick-basic/metadata/seaborn.yaml
+++ b/plots/candlestick-basic/metadata/seaborn.yaml
@@ -23,3 +23,172 @@ review:
   - Code specifies colorblind-safe blue/orange colors but plot renders green/red (color
     variable assignment mismatch)
   - Structure uses for loops and functions rather than pure KISS linear style
+  image_description: The plot displays a candlestick chart showing 30 trading days
+    of stock price data from January 2 to approximately February 10, 2024. The chart
+    uses **green** for bullish (up) candles and **red** for bearish (down) candles
+    - this is the traditional green/red color scheme, NOT the colorblind-safe blue/orange
+    mentioned in the code. The y-axis shows "Price ($)" ranging from approximately
+    133 to 160. The x-axis shows "Date" with tick labels at 5-day intervals (Jan 02,
+    Jan 09, Jan 16, Jan 23, Jan 30, Feb 06). The title correctly displays "candlestick-basic
+    · seaborn · pyplots.ai". A legend in the upper left shows "Bullish (Up)" in green
+    and "Bearish (Down)" in red. The data shows a general downward trend from ~$150
+    to ~$135 with both bullish and bearish periods clearly visible. Wicks (shadows)
+    are clearly visible and thinner than the candle bodies. A subtle horizontal grid
+    helps read price levels.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Candlesticks well-sized, wicks visible, bodies clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses green/red (not colorblind-safe despite code comment) - deducted
+          2 points
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, appropriate padding
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Price ($)" with units, "Date" descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend uses green/red inconsistent with code's blue/orange intent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: OHLC data correctly mapped to candle elements
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Body shows open-close, wicks show high-low, color distinguishes direction
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all price data with padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies bullish/bearish
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "candlestick-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, varying body sizes, visible
+          wicks, doji-like small bodies
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Stock price simulation is plausible, shows realistic downward trend
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Price range $133-160 is realistic for stock data
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: true
+        comment: Uses for loop with functions, not pure KISS style
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_theme, sns.set_context, sns.lineplot for wicks, but
+          candlestick is not a native seaborn chart type so relies on matplotlib primitives
+  verdict: APPROVED
diff --git a/plots/candlestick-volume/metadata/altair.yaml b/plots/candlestick-volume/metadata/altair.yaml
index fdfb9623f5..88d840adaf 100644
--- a/plots/candlestick-volume/metadata/altair.yaml
+++ b/plots/candlestick-volume/metadata/altair.yaml
@@ -28,3 +28,177 @@ review:
   - Missing crosshair/cursor that spans both panes as specified in requirements
   - No .interactive() call - misses Altair key interactivity strength for financial
     charts
+  image_description: The plot displays a professional dual-pane candlestick chart
+    with volume. The upper pane (roughly 75% of height) shows 60 days of OHLC candlesticks
+    with blue bars for bullish days (close >= open) and yellow/gold bars for bearish
+    days. Clear wicks extend from each candle body showing high-low range. Price axis
+    labeled "Price ($)" ranges from ~$110 to $154, showing a general downtrend from
+    $150 to $115 over the period. The lower pane (~25% height) displays volume bars
+    using the identical blue/yellow color scheme for visual consistency. Volume axis
+    labeled "Volume" with SI notation (2M, 4M, etc.) up to 12M. Both panes share a
+    synchronized x-axis showing dates from early January through late March 2024.
+    The title "candlestick-volume · altair · pyplots.ai" appears centered at the top
+    with proper formatting.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title 28pt, axis labels 20pt, tick labels 16pt - all clearly readable
+          but tick labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Candlesticks and volume bars well-sized, but some narrow candles
+          on volatile days could be slightly thicker
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow scheme is colorblind-safe and provides excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: 75/25 split between price and volume panes as specified, good canvas
+          utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Price ($)" with units, "Volume" and "Date" descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No visible grid lines (spec asks for subtle grid), no legend shown
+          (color scheme is intuitive but legend was created but not displayed)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick + volume dual-pane chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, OHLC correctly mapped to candles, volume to bars
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Dual-pane layout ✓, shared x-axis ✓, color-coded ✓, tooltips ✓ -
+          Missing: crosshair/cursor spanning both panes per spec'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes properly scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend created in code but not actually displayed in final chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "candlestick-volume · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both bullish AND bearish candles, varying volume, price trend
+          with volatility
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price data with realistic daily movements, appropriate volume
+          levels
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Price ~$110-154 realistic for stocks, volume in millions appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → chart creation → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct), but legend chart created
+          but not included in final output
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of vconcat for dual pane, layering for candlesticks, tooltips
+          - but no interactivity (.interactive()) which is Altair's key strength
+  verdict: APPROVED
diff --git a/plots/candlestick-volume/metadata/bokeh.yaml b/plots/candlestick-volume/metadata/bokeh.yaml
index c909470feb..ea0a6dd1ca 100644
--- a/plots/candlestick-volume/metadata/bokeh.yaml
+++ b/plots/candlestick-volume/metadata/bokeh.yaml
@@ -25,3 +25,172 @@ review:
   - Volume bar colors appear more muted compared to candlesticks - consider matching
     fill_alpha
   - Missing HoverTool which would enhance Bokeh interactive capabilities
+  image_description: The plot displays a professional dual-pane candlestick chart
+    with volume. The upper pane (~70% height) shows OHLC candlesticks spanning 60
+    trading days from June to August 2024, with Python Blue (#306998) for bullish
+    (up) days and Yellow (#FFD43B) for bearish (down) days. The wicks (high-low lines)
+    and bodies are clearly visible. The price ranges from approximately $125 to $165.
+    The lower pane (~30% height) displays volume bars using the same color scheme,
+    with volume ranging from 0 to ~4.5 million. Both panes share a common x-axis with
+    rotated date labels. The title "candlestick-volume · bokeh · pyplots.ai" appears
+    at the top left. Grid lines are subtle with dashed styling. Y-axes are labeled
+    "Price ($)" and "Volume" with the x-axis labeled "Date".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 18-22pt all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, rotated x-axis labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Candlesticks well-sized, wicks visible, slight deduction for thin
+          wicks
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/Yellow is colorblind-safe but not the most distinctive pair
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent 70/30 split, good canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Price ($)", "Volume", "Date" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed, but grid could be slightly more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick + volume chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: OHLC correctly mapped to candlesticks, volume to bars
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Dual pane, shared x-axis, color-coded volume bars, crosshair tools
+          added
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this chart type (colors self-explanatory)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "candlestick-volume · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, volume variation, overall
+          downtrend with rallies
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price data from $125-165 with realistic 2M base volume
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Good price range, volume slightly high for some stocks but plausible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png (correct) but also plot.html unnecessarily
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, CrosshairTool, linked x-range, vbar/segment.
+          Could leverage HoverTool for tooltips
+  verdict: APPROVED
diff --git a/plots/candlestick-volume/metadata/highcharts.yaml b/plots/candlestick-volume/metadata/highcharts.yaml
index 1bdbca067d..085e841d4a 100644
--- a/plots/candlestick-volume/metadata/highcharts.yaml
+++ b/plots/candlestick-volume/metadata/highcharts.yaml
@@ -24,3 +24,166 @@ review:
   - Blue/red color scheme could be improved for colorblind accessibility (consider
     blue/orange instead)
   - X-axis date labels appear clipped/truncated at bottom of image
+  image_description: The plot displays a professional stock candlestick chart with
+    a synchronized volume pane below. The upper pane (approximately 70% of height)
+    shows OHLC candlesticks over 60 trading days, with blue candles representing bullish
+    (up) days and red candles representing bearish (down) days. Price ranges from
+    approximately $118 to $170, showing an overall downtrend. The lower pane (approximately
+    25% of height) displays volume bars in matching colors - blue for up days and
+    red for down days. Volumes range from about 2M to 10M units. The title "candlestick-volume
+    · highcharts · pyplots.ai" appears at the top. Y-axis labels show "Price (USD)"
+    and "Volume" with proper formatting ($XXX and X.XM). X-axis shows date labels
+    (day numbers). Grid lines are subtle gray. A crosshair is configured for precise
+    reading.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, axis labels at 28px, tick labels at 24px - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Candlesticks and volume bars are well-sized for data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue/red which is distinguishable but not ideal for colorblind
+          users (3/5)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good 70/25 split, proper canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Price (USD)" and "Volume" are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle, but no legend present (series names in tooltip only)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart with volume bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: OHLC data correctly mapped to candlesticks, volume to bars
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Dual-pane layout, shared x-axis, color-matched volume bars, crosshair
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 60 trading days visible with appropriate price/volume range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Series named "Stock Price" and "Volume"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"candlestick-volume · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, varied volumes, trending
+          data
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price data with realistic daily movements and gaps
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values are realistic but the downtrend is quite pronounced; volumes
+          correlate with price moves
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highstock API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/candlestick-volume/metadata/letsplot.yaml b/plots/candlestick-volume/metadata/letsplot.yaml
index d4a4d4322d..0ec573cae3 100644
--- a/plots/candlestick-volume/metadata/letsplot.yaml
+++ b/plots/candlestick-volume/metadata/letsplot.yaml
@@ -24,3 +24,175 @@ review:
   - Crosshair cursor mentioned in comments but only available in HTML output, not
     PNG
   - Minor visual gap between the two panes could be tighter
+  image_description: The plot displays a professional dual-pane candlestick chart
+    with synchronized volume bars. The upper pane (~70% height) shows OHLC candlesticks
+    with blue color for "Up Day" and orange for "Down Day", depicting a stock declining
+    from ~$165 to ~$127 over 60 trading days (Jan-Mar 2024). Wicks are visible extending
+    from candlestick bodies. The lower pane (~30% height) shows corresponding volume
+    bars using the same color scheme, with human-readable y-axis labels (3.9M, 6.1M,
+    8.3M). The title "candlestick-volume · letsplot · pyplots.ai" appears at top,
+    with a horizontal legend showing "Direction" with Up Day/Down Day entries. Both
+    panes share a common x-axis showing dates. Grid lines are subtle gray. The layout
+    is well-balanced with good canvas utilization.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title ~24pt, axis labels ~20pt, tick marks ~16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, date labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Candlesticks are visible with appropriate sizing; wicks could be
+          slightly thicker for better visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#0077BB) and orange (#EE7733) are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good 70/30 split as specified; slight gap between panes but acceptable
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Price ($)" and "Volume (shares)" with units, "Date (2024)" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle, but legend positioning at top overlaps with data
+          area conceptually; minor grid alignment issue between panes
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart with volume bars in dual-pane layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X, OHLC prices correctly mapped, volume on lower pane
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has shared x-axis, color-coded volume, proper pane ratio; no crosshair
+          cursor in PNG (only available in HTML)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Direction" legend with "Up Day"/"Down Day" is accurate'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"candlestick-volume · letsplot · pyplots.ai" is correct'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both bullish (blue) and bearish (orange) candles, varying volume,
+          overall downtrend with some recovery; good mix but could show more dramatic
+          reversals
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price data with realistic volatility and volume correlation
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Prices ($127-$165) and volumes (4M-8M) are realistic; volume format
+          is human-readable
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: Has helper function `format_volume()` which breaks KISS principle
+          (no functions/classes rule)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern gggrid instead of deprecated GGBunch
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses gggrid for multi-panel layout, ggplot2 grammar, scale_color_manual,
+          theme customization, ggsave with scale parameter
+  verdict: APPROVED
diff --git a/plots/candlestick-volume/metadata/matplotlib.yaml b/plots/candlestick-volume/metadata/matplotlib.yaml
index 5bdd629a86..9e06a33397 100644
--- a/plots/candlestick-volume/metadata/matplotlib.yaml
+++ b/plots/candlestick-volume/metadata/matplotlib.yaml
@@ -22,3 +22,163 @@ review:
   - Missing crosshair/cursor spanning both panes as specified in requirements
   - Candlestick wicks are thin (linewidth=1.5) - could be slightly thicker for better
     visibility at scale
+  image_description: The plot displays a professional dual-pane candlestick chart
+    with volume. The upper pane (~75% height) shows OHLC candlesticks over 60 trading
+    days from January to late March 2024. Up days (Close ≥ Open) use Python Blue (#306998),
+    down days use Python Yellow (#FFD43B). Price ranges from ~$120 to ~$165 with a
+    clear downtrend visible. The lower pane (~25% height) shows corresponding volume
+    bars in matching colors, ranging from 0 to ~10M shares. Both panes share a common
+    x-axis with date labels (Jan 01, Jan 15, etc.) rotated 45°. The title "candlestick-volume
+    · matplotlib · pyplots.ai" is prominently displayed. A legend in the upper left
+    explains the color coding. Subtle dashed grid lines (alpha=0.3) appear in both
+    panes.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, date labels properly rotated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Candlesticks clearly visible, wicks slightly thin but acceptable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: 75%/25% split as specified, good margins, efficient canvas use
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Price ($)" has units, "Volume" and "Date" are descriptive but lack
+          units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha=0.3), legend well-placed upper left
+    spec_compliance:
+      score: 22
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick chart with volume pane
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: OHLC correctly mapped, dates on x-axis
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Missing crosshair/cursor spanning both panes as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 60 days visible, y-axes appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly explains up/down coloring
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "candlestick-volume · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, varied wicks, volume variation
+          on big moves
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price data with realistic OHLC patterns, ~$150 base price
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic stock prices ($120-165), volumes in millions (4-10M)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, Patch)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/candlestick-volume/metadata/plotly.yaml b/plots/candlestick-volume/metadata/plotly.yaml
index 9a793186ff..f1830eec23 100644
--- a/plots/candlestick-volume/metadata/plotly.yaml
+++ b/plots/candlestick-volume/metadata/plotly.yaml
@@ -23,3 +23,173 @@ review:
   weaknesses:
   - Volume axis label lacks units (should be "Volume (shares)" or similar)
   - No legend or annotation explaining that blue=up day, yellow=down day
+  image_description: The plot displays a professional dual-pane candlestick chart
+    with volume data spanning 60 trading days (January-March 2024). The upper pane
+    (~75% height) shows OHLC candlesticks with prices ranging from ~$120 to ~$165,
+    using blue (#306998) for bullish days and yellow/gold (#FFD43B) for bearish days.
+    The lower pane (~25% height) displays corresponding volume bars with matching
+    colors. The title "candlestick-volume · plotly · pyplots.ai" is properly centered
+    at the top. Axis labels include "Price ($)" with dollar formatting and "Volume"
+    with abbreviated notation (M for millions). The shared x-axis shows "Date" with
+    clear date labels. Grid lines are subtle (rgba with 0.2 alpha) and aligned across
+    both panes.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at size 32, labels at 22, ticks at 18 - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, date labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Candlesticks and volume bars appropriately sized for 60 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow scheme is colorblind-friendly, though could use higher
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: 75/25 split as specified, good margins, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Price has units ($), Volume lacks units (shares/units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (0.2 alpha), but no legend explaining the color scheme
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct candlestick with volume dual-pane layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: OHLC correctly mapped, dates on x-axis, volume synchronized
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shared x-axis, matching colors, crosshair/spikes enabled, 75/25 split
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (colors are self-explanatory in candlestick
+          context)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "candlestick-volume · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both bullish and bearish candles, volume variation, clear downtrend
+          with recovery
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic stock price simulation (~$150 starting price, reasonable
+          volatility)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Price values realistic; volume at 5M base is plausible but on higher
+          end for typical stocks
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, pandas, plotly.graph_objects, make_subplots)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of make_subplots with shared_xaxes, go.Candlestick,
+          unified hover mode, spike lines for crosshair, proper HTML export for interactivity
+  verdict: APPROVED
diff --git a/plots/cat-box-strip/metadata/altair.yaml b/plots/cat-box-strip/metadata/altair.yaml
index 5b4a1cd9f0..c02bdac625 100644
--- a/plots/cat-box-strip/metadata/altair.yaml
+++ b/plots/cat-box-strip/metadata/altair.yaml
@@ -22,3 +22,177 @@ review:
   weaknesses:
   - Missing legend to explain box plot vs strip point representation
   - No tooltips for interactivity (Altair strength not fully utilized)
+  image_description: 'The plot displays a combined box plot with strip overlay showing
+    "Quality Score" (y-axis, ranging from ~20-110) across four departments: Engineering,
+    Marketing, Sales, and Support (x-axis). Each department shows a blue (#306998)
+    box plot with quartile boxes, whiskers, and a distinctive yellow/gold median line.
+    Individual data points are overlaid as semi-transparent blue circles with horizontal
+    jitter to reduce overlap. Engineering shows a tight, high-scoring distribution
+    (~75-95). Marketing has a wider spread (~40-98) with visible outliers. Sales shows
+    lower scores (~35-95) with notable outliers. Support displays a bimodal-like pattern
+    (~47-90). The title "cat-box-strip · altair · pyplots.ai" appears at the top in
+    appropriate font size. Grid lines are subtle with low opacity. The plot fills
+    the canvas well with balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; department labels are well-spaced with labelAngle=0
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Box plots and strip points are visible; jitter works well; point
+          size could be slightly larger for ~150 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme with yellow median; no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization; plot fills space appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Department", "Quality Score") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend present to identify what
+          points vs boxes represent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct combined box plot with strip overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical (Department) on X, numeric (Quality Score) on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Box shows median/quartiles/whiskers; strip points overlay with jitter
+          and transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis scale [25, 105] shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-color visualization
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-box-strip · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: tight distribution (Engineering), wide spread
+          with outliers (Marketing), lower scores with outliers (Sales), bimodal pattern
+          (Support)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product quality scores across departments is a plausible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores 30-100 are realistic; minor issue with scale going to 105
+          when max is ~100
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves both plot.png and plot.html (correct for Altair)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's layering, transform_calculate for jitter, mark_boxplot
+          with customization; could have added tooltips for interactivity
+  verdict: APPROVED
diff --git a/plots/cat-box-strip/metadata/bokeh.yaml b/plots/cat-box-strip/metadata/bokeh.yaml
index 282496f70a..07ad3f8800 100644
--- a/plots/cat-box-strip/metadata/bokeh.yaml
+++ b/plots/cat-box-strip/metadata/bokeh.yaml
@@ -25,3 +25,175 @@ review:
   - Could add HoverTool to show exact values on hover for the HTML version
   - The median line segment has zero width (x0=x1) which works but is technically
     a point not a line
+  image_description: The plot displays a box plot with strip overlay showing plant
+    growth (cm) across four soil types (Sandy, Clay, Loamy, Silty). The boxes are
+    rendered in a muted blue color (#306998) with 40% transparency, showing median
+    lines as darker horizontal segments. Yellow/gold circular markers (#FFD43B) with
+    dark gold borders represent individual data points, scattered with jitter across
+    each category. The title "cat-box-strip · bokeh · pyplots.ai" appears at the top
+    left in dark text. The x-axis is labeled "Soil Type" and y-axis "Plant Growth
+    (cm)". The background is a subtle off-white (#fafafa) with light gray dashed horizontal
+    grid lines. Whiskers extend from boxes to show data range, with visible outliers
+    in Clay (high values ~55-58) and Silty (low values ~12-14).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt/22pt, all clearly readable at
+          full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Strip points visible with good jitter and alpha, box elements clear.
+          Minor: some points cluster tightly in Loamy'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue boxes and yellow points provide excellent contrast, colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight excess whitespace on right edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Plant Growth (cm)", "Soil Type"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha 0.3), but no legend present (though not strictly
+          required for this plot type)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot with strip overlay combination
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, numeric values on Y as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Box shows median, Q1, Q3; whiskers show range; strip points overlay
+          with jitter and transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (single series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-box-strip · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: shows different distributions (narrow Sandy, wide Clay),
+          outliers (Clay high, Silty low), varying sample sizes'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: 'Plant growth by soil type is plausible and neutral. Minor: values
+          slightly generic'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Plant growth 10-60cm is realistic for the scenario
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, pandas, bokeh components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png but also plot.html (minor extra file)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource, Whisker annotations, jitter transform.
+          Could leverage more Bokeh-specific features like HoverTool for interactivity
+  verdict: APPROVED
diff --git a/plots/cat-box-strip/metadata/highcharts.yaml b/plots/cat-box-strip/metadata/highcharts.yaml
index 292aebe149..5dbcc45d91 100644
--- a/plots/cat-box-strip/metadata/highcharts.yaml
+++ b/plots/cat-box-strip/metadata/highcharts.yaml
@@ -26,3 +26,176 @@ review:
   - Some minor empty space at the top of the chart could be reduced
   - Median line (yellow) blends slightly with the strip points (also yellow) - different
     styling could improve distinction
+  image_description: 'The plot displays a box plot with strip overlay showing plant
+    growth (cm) across four light conditions: Low Light, Medium Light, High Light,
+    and Full Sun. The box plots are rendered in blue (#306998) with semi-transparent
+    fill and yellow (#FFD43B) median lines. Individual data points are overlaid as
+    yellow circles with jitter for visibility. The title "cat-box-strip · highcharts
+    · pyplots.ai" appears at the top in bold. The y-axis shows "Plant Growth (cm)"
+    ranging from ~2 to 64, and the x-axis shows the four light condition categories.
+    A legend in the top-right corner distinguishes "Distribution Statistics" (blue)
+    from "Individual Measurements" (yellow). The data shows increasing plant growth
+    with more light exposure, with Full Sun having visible outliers both low (~20-22
+    cm) and high (~58 cm).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; tick labels slightly
+          small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers and boxes well-sized; some strip points slightly overlap
+          but jitter helps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; slight empty space at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Plant Growth (cm)"; X-axis has descriptive "Light
+          Condition"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; legend placement good but could be closer to plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: box plot with strip overlay'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, numeric values on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Box shows median, Q1, Q3, whiskers; strip points overlay with jitter
+          and transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies both series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-box-strip · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions, outliers, varying sample sizes; could
+          show more spread variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth under different light conditions is a realistic, neutral
+          scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for plant growth in cm; some outliers at 58-60
+          cm are plausible but high
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Using older pattern; could use more modern highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses BoxPlotSeries and ScatterSeries overlay; could leverage more
+          Highcharts-specific interactivity features in the HTML output
+  verdict: APPROVED
diff --git a/plots/cat-box-strip/metadata/letsplot.yaml b/plots/cat-box-strip/metadata/letsplot.yaml
index c4c1234fb8..68be0b9207 100644
--- a/plots/cat-box-strip/metadata/letsplot.yaml
+++ b/plots/cat-box-strip/metadata/letsplot.yaml
@@ -25,3 +25,181 @@ review:
   - Output path uses __file__ which may cause issues in some execution contexts
   - Could leverage lets-plot interactive features for HTML output (tooltips showing
     exact values)
+  image_description: The plot displays a box plot with strip overlay showing Performance
+    Score (y-axis, range 35-100) across four departments (Engineering, Marketing,
+    Sales, Support) on the x-axis. Each department has a blue (#306998) box showing
+    quartiles with a dark median line, and yellow (#FFD43B) jittered points overlaid
+    on top representing individual data points. The title "cat-box-strip · letsplot
+    · pyplots.ai" is displayed in bold at the top. The background is clean with minimal
+    grid (only horizontal lines), and text is clearly legible. Engineering shows high
+    performers (~95-97) and one low outlier (~42). Marketing has wider spread. Sales
+    has two low outliers (~35-38). Support shows a tighter distribution.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large (~24pt), axis labels are clearly readable
+          (~20pt), tick labels are appropriately sized (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=4) with good alpha (0.7), jitter prevents
+          most overlap. Minor deduction as some points in dense areas slightly overlap
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow color scheme is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot fills appropriate area. Minor whitespace
+          at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Department", "Performance Score") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with only horizontal lines (good), but no legend needed
+          for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct chart type: box plot with strip overlay'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical (Department) on X, numeric (Performance Score) on Y -
+          correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: box shows median/Q1/Q3, whiskers show
+          range, strip points overlay with jitter and transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 35 to 100, capturing all data including
+          outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series box+strip, correctly omitted
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "cat-box-strip · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions (Engineering high, Marketing wide,
+          Support tight), outliers visible in multiple groups. Could show more dramatic
+          differences between groups
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Performance scores across departments is a real, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Scores 35-100 are realistic for performance metrics. Minor: some
+          outliers at 35-42 are quite low for "performance scores"'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (os, numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to implementation directory via __file__ rather than current
+          working directory
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar (ggplot + geom_boxplot + geom_jitter), theme_minimal(),
+          element_text/element_blank for styling. Could leverage more lets-plot specific
+          features like tooltips or interactive elements
+  verdict: APPROVED
diff --git a/plots/cat-box-strip/metadata/matplotlib.yaml b/plots/cat-box-strip/metadata/matplotlib.yaml
index 3598646871..8b737d8a71 100644
--- a/plots/cat-box-strip/metadata/matplotlib.yaml
+++ b/plots/cat-box-strip/metadata/matplotlib.yaml
@@ -24,3 +24,172 @@ review:
   - Axis labels could include units (e.g., "Response Value (units)" or "Score")
   - Could use matplotlib-specific features like custom boxplot statistics or violinplot
     overlay for more library-distinctive implementation
+  image_description: The plot displays a box plot with strip overlay showing four
+    treatment groups (Control, Treatment A, Treatment B, Treatment C) on the x-axis
+    and Response Value (0-100) on the y-axis. Each group has a light blue semi-transparent
+    box with a yellow/gold median line. Individual data points are overlaid as blue
+    circles with white edges and jitter applied for visibility. Treatment B notably
+    shows outliers at both high (~80-82) and low (~15) values. The title uses the
+    correct format "cat-box-strip · matplotlib · pyplots.ai". Grid lines are subtle
+    dashed lines on the y-axis only. All text is clearly readable with appropriate
+    font sizes.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers s=100 appropriate for 30-45 points per group, alpha=0.6 good
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme (#306998), no color differentiation needed
+          between groups
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units ("Response Value" could have units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3, but no legend present (not strictly needed
+          for this plot type)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot with strip overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, numeric values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Box shows median/Q1/Q3, whiskers show range, strip points with jitter
+          and transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-100 shows all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-color strip overlay
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "cat-box-strip · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Excellent! Shows different distributions (normal, bimodal), different
+          spreads, outliers in Treatment B, varying sample sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Treatment/control groups is plausible scientific scenario, generic
+          enough
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 0-100 are sensible for a response metric
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses tick_labels (not deprecated labels)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Basic matplotlib usage, no distinctive features like PathCollection
+          properties, custom colormaps, or advanced boxplot statistics
+  verdict: APPROVED
diff --git a/plots/cat-box-strip/metadata/plotly.yaml b/plots/cat-box-strip/metadata/plotly.yaml
index 1327a27029..bc91ceed47 100644
--- a/plots/cat-box-strip/metadata/plotly.yaml
+++ b/plots/cat-box-strip/metadata/plotly.yaml
@@ -28,3 +28,172 @@ review:
   - Could use Plotly native boxpoints parameter instead of manually adding scatter
     traces for a more elegant solution
   - Grid alpha is perhaps too subtle, making it hard to read precise values
+  image_description: The plot displays a box-and-strip combination chart with four
+    categories (Method A, B, C, D) on the x-axis and "Performance Score" (ranging
+    from ~25-95) on the y-axis. Each category has a blue-outlined box plot with light
+    blue fill showing quartiles and median, with whiskers extending to show range.
+    Yellow circular markers with blue outlines are overlaid on each box as jittered
+    strip points, showing individual data values with transparency. The title "cat-box-strip
+    · plotly · pyplots.ai" is centered at the top. The background is white with subtle
+    horizontal gridlines. Method B shows higher, tighter scores; Method C shows lower
+    scores with clear outliers extending down to ~25; Method D shows a wider spread
+    suggesting bimodal distribution.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Strip points are visible with good jitter and opacity (0.7), though
+          slightly smaller markers might reduce overlap in dense areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue boxes with yellow points provide excellent contrast, colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins (l=80, r=50, t=100,
+          b=80)
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Training Method", "Performance Score") but
+          lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), no legend needed as colors are self-explanatory,
+          but grid could be slightly more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot with strip overlay combination
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly on X-axis, numeric values on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Box shows median, Q1, Q3; whiskers show range; strip points overlay
+          with jitter and transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis accommodates all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, colors consistent across elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-box-strip · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows different distribution shapes (normal, tight, with outliers,
+          bimodal), demonstrates sample size variation; minor: bimodal nature of Method
+          D not strongly visible'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Training method performance comparison is a plausible, neutral educational/business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores in 25-95 range are reasonable for performance metrics, though
+          could benefit from clearer context
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for data and seed(123) for jitter
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, plotly.graph_objects - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html which is correct for plotly
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/cat-box-strip/metadata/plotnine.yaml b/plots/cat-box-strip/metadata/plotnine.yaml
index ef9022c2e7..2968790285 100644
--- a/plots/cat-box-strip/metadata/plotnine.yaml
+++ b/plots/cat-box-strip/metadata/plotnine.yaml
@@ -23,3 +23,174 @@ review:
   weaknesses:
   - Grid is barely visible (nearly invisible), could use slightly higher alpha for
     better reference
+  image_description: 'The plot displays a box plot with strip overlay showing plant
+    growth (cm) across four treatment groups: Control (blue), Fertilizer A (yellow),
+    Fertilizer B (light blue), and Fertilizer C (green). Each category has a colored
+    box showing median, quartiles, and whiskers, with individual data points (jittered)
+    overlaid on top. The title "cat-box-strip · plotnine · pyplots.ai" appears at
+    the top. X-axis is labeled "Treatment Group" and Y-axis is labeled "Plant Growth
+    (cm)". A legend on the right identifies each fertilizer type. The Control group
+    shows lower values (~15-35 cm), Fertilizer A shows moderate improvement (~25-45
+    cm), Fertilizer B shows tighter distribution (~38-52 cm), and Fertilizer C shows
+    highest values with some outliers (~27-55 cm).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend all clearly readable
+          at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, categories well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Boxes and jittered points are clearly visible, good alpha transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, light blue, green) are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Plant Growth (cm)" includes units, "Treatment Group" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is very subtle/nearly invisible, legend is well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct box plot with strip overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical on X, numeric on Y as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Box shows median/quartiles/whiskers, strip points with jitter and
+          transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four treatment groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "cat-box-strip · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions, spreads, and outliers; Fertilizer
+          C shows bimodal behavior with underperformers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth/fertilizer experiment is a plausible, neutral scientific
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Plant growth values (15-55 cm) are reasonable, though actual fertilizer
+          trials might show less dramatic differences
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of ggplot2 grammar: geom_boxplot + geom_jitter layering,
+          scale_fill_manual/scale_color_manual, theme_minimal with custom element_text
+          sizing'
+  verdict: APPROVED
diff --git a/plots/cat-box-strip/metadata/pygal.yaml b/plots/cat-box-strip/metadata/pygal.yaml
index a1741c36d8..4f53dff1db 100644
--- a/plots/cat-box-strip/metadata/pygal.yaml
+++ b/plots/cat-box-strip/metadata/pygal.yaml
@@ -23,3 +23,180 @@ review:
   weaknesses:
   - Grid only shows on y-axis, no x-axis guides (minor visual consistency issue)
   - Some strip points in dense areas still overlap slightly despite jitter
+  image_description: 'The plot displays a box plot with strip overlay showing plant
+    height (cm) across four light conditions: Full Sun (blue), Partial Shade (yellow),
+    Full Shade (green), and Artificial (orange). Each category shows a filled rectangular
+    box representing the IQR (Q1-Q3) with a darker median line inside. Vertical whiskers
+    extend from the boxes to caps indicating the data range. Individual data points
+    are overlaid on each box with slight horizontal jitter, using matching colors
+    with transparency. The title "cat-box-strip · pygal · pyplots.ai" appears at the
+    top. The x-axis is labeled "Light Condition" and y-axis is labeled "Plant Height
+    (cm)" with values ranging from 0 to 80. The layout is clean with subtle horizontal
+    grid lines on a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          4800x2700 resolution with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Strip points are visible with good jitter and transparency; box elements
+          are clear; slight deduction as some points in dense areas could benefit
+          from more jitter
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, orange palette is colorblind-safe with good
+          distinction
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units ("Plant Height (cm)",
+          "Light Condition")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present (acceptable as colors match category labels), but
+          grid is y-axis only which is fine
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct combined box plot with strip overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, numeric values on Y correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Box shows median, Q1, Q3; whiskers show range; strip points overlay
+          with jitter and transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range 0-80, capturing all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; colors clearly correspond to category labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-box-strip · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions (Full Sun higher, Full Shade lower),
+          different spreads (Artificial has wide spread), and outliers visible beyond
+          whiskers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth under different light conditions is a plausible, neutral
+          scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Plant heights 10-72 cm are realistic; slight deduction as the range
+          could be tighter to real-world observations
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save; no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and additional seeds for jitter
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style imported; all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Creative use of XY chart to manually construct box plot with strip
+          overlay; demonstrates pygal flexibility but doesn't use pygal.Box() native
+          feature
+  verdict: APPROVED
diff --git a/plots/cat-box-strip/metadata/seaborn.yaml b/plots/cat-box-strip/metadata/seaborn.yaml
index de94625609..ea2831ae65 100644
--- a/plots/cat-box-strip/metadata/seaborn.yaml
+++ b/plots/cat-box-strip/metadata/seaborn.yaml
@@ -24,3 +24,178 @@ review:
   - Realistic manufacturing quality scenario with appropriate value ranges
   weaknesses:
   - Grid styling could include both x and y axes for complete reference (minor)
+  image_description: 'The plot displays a combined box plot with strip overlay showing
+    quality scores across four manufacturing batches (A, B, C, D). The boxes are rendered
+    in Python Blue (#306998) with the median line visible inside each box. Yellow
+    (#FFD43B) circular data points with black edge outlines are overlaid on each box
+    using jitter for horizontal spread. The title "cat-box-strip · seaborn · pyplots.ai"
+    is prominently displayed at the top. The x-axis shows "Manufacturing Batch" with
+    four category labels, and the y-axis shows "Quality Score (points)" ranging from
+    approximately 35 to 100. A subtle dashed grid is present on the y-axis. Each batch
+    displays different distribution characteristics: Batch B shows a higher median
+    with tight spread, Batch D shows the widest spread, and Batch C shows some lower
+    outliers.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Strip points well-sized (s=10) with good alpha (0.7) and jitter (0.2),
+          boxes appropriately sized; minor deduction as some points on Batch D overlap
+          slightly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Quality Score (points)" and "Manufacturing
+          Batch"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present (none needed for this plot type), grid is subtle
+          and appropriate
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct combined box plot with strip overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical variable (Batch) on X, numeric variable (Quality Score)
+          on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Box shows median, Q1, Q3, whiskers; strip points overlay with jitter
+          and transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (35-105) shows all data points including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; color coding is self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-box-strip · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variation: different medians, spreads, and outliers across
+          batches'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Manufacturing quality scores is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Quality scores 40-100 are realistic for manufacturing metrics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of seaborn's boxplot and stripplot combined with proper
+          API (hue for palette), fliersize=0 to avoid duplicate outliers, and clean
+          DataFrame integration
+  verdict: APPROVED
diff --git a/plots/cat-strip/metadata/altair.yaml b/plots/cat-strip/metadata/altair.yaml
index d7fda1203f..0b8504eed7 100644
--- a/plots/cat-strip/metadata/altair.yaml
+++ b/plots/cat-strip/metadata/altair.yaml
@@ -27,3 +27,177 @@ review:
     type better
   - Could use additional Altair features like selection for interactivity or layering
     with mean markers
+  image_description: 'The plot displays a categorical strip plot showing customer
+    ratings (1-5 stars) across 5 departments: Books, Clothing, Electronics, Home &
+    Garden, and Sports. Points are rendered as blue circles (#306998) with horizontal
+    jitter applied to reduce overlap. The y-axis ranges from 1.0 to 5.0 with the label
+    "Customer Rating (1-5 stars)". The x-axis shows "Department" with all 5 category
+    labels clearly visible and not rotated. The title "cat-strip · altair · pyplots.ai"
+    appears at the top center. Each department shows approximately 30 data points
+    with varying distributions - Books clusters tightly around 4.5, Clothing shows
+    wider spread down to ~2.2, Electronics clusters around 4.0-4.5 with some at 5.0,
+    Home & Garden shows moderate spread around 4.0, and Sports clusters tightly around
+    4.3-4.5. Subtle gray grid lines are present. The overall layout is clean with
+    good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, category labels well-spaced with labelAngle=0
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers size=200 with opacity=0.7 appropriate for 150 points, though
+          some overlap within categories
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast, no color differentiation
+          needed
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, good use of 1600x900
+          dimensions
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "Customer Rating (1-5 stars)",
+          X-axis has "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend present (not strictly needed
+          for single-color plot)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical strip plot with jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, numeric values on Y as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter implemented via transform_calculate, individual points visible
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full 1-5 range, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-color strip plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "cat-strip · altair · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows different distributions (tight vs spread), but all categories
+          have similar point counts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product ratings across departments is a real, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Ratings 1-5 are realistic, values appropriately clipped
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used, all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses transform_calculate for jitter, xOffset encoding, tooltips -
+          good Altair idioms but could leverage more declarative features
+  verdict: APPROVED
diff --git a/plots/cat-strip/metadata/bokeh.yaml b/plots/cat-strip/metadata/bokeh.yaml
index 77f39c6032..c94c35def1 100644
--- a/plots/cat-strip/metadata/bokeh.yaml
+++ b/plots/cat-strip/metadata/bokeh.yaml
@@ -20,3 +20,174 @@ review:
   - Proper use of ColumnDataSource for data management
   weaknesses:
   - Could add HoverTool to show exact values on hover leveraging Bokeh interactivity
+  image_description: 'The plot displays a categorical strip plot with 5 manufacturing
+    batches (A through E) on the x-axis and Quality Score (ranging from approximately
+    45-97) on the y-axis. Points are rendered in a muted blue color (#306998) with
+    darker blue outlines. Each batch shows individual data points with horizontal
+    jitter applied to reduce overlap. The distributions vary: Batch C shows the highest
+    and tightest clustering (~90-97), Batch D shows the widest spread with some low
+    outliers (~47-87), Batch A and E show medium-high scores, and Batch B shows medium
+    scores with moderate spread. The title "cat-strip · bokeh · pyplots.ai" appears
+    at the top left. The background is light gray (#fafafa) with subtle dashed horizontal
+    grid lines. The Bokeh toolbar is visible in the top right corner.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, tick labels at 28pt - all clearly
+          readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at size=25 with alpha=0.7 work well for 125 points, though
+          slightly on the larger side
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot fills appropriate area, minor issue with
+          toolbar placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Manufacturing Batch" and "Quality Score" are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Subtle dashed y-grid at 0.3 alpha is good, but no legend present
+          (though not strictly needed for single-color plot)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical strip plot with jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, numeric values on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied, individual points visible, categories clearly separated
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range of data (~45-97)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-color categorical strip
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-strip · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied distributions (tight vs spread, high vs low means),
+          includes outliers in Batch D
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Manufacturing quality scores is a plausible, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Quality scores 45-97 are reasonable, though some values slightly
+          below typical QC thresholds
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses jitter transform and ColumnDataSource which are Bokeh-specific,
+          but could leverage more Bokeh features like HoverTool for interactivity
+  verdict: APPROVED
diff --git a/plots/cat-strip/metadata/highcharts.yaml b/plots/cat-strip/metadata/highcharts.yaml
index f0fe5ca22a..01185b10f3 100644
--- a/plots/cat-strip/metadata/highcharts.yaml
+++ b/plots/cat-strip/metadata/highcharts.yaml
@@ -23,3 +23,179 @@ review:
   - Legend position in top-right corner encroaches on the data visualization area
   - Points lack alpha/transparency which would help visualize density in overlapping
     areas
+  image_description: 'The plot displays a categorical strip plot showing plant height
+    measurements (cm) across four treatment groups: Control, Fertilizer A, Fertilizer
+    B, and Fertilizer C. The title "cat-strip · highcharts · pyplots.ai" appears at
+    the top center. The x-axis shows "Treatment Group" with four category labels,
+    and the y-axis shows "Plant Height (cm)" ranging from approximately 7-42 cm. Each
+    category has its own color: Control (blue #306998), Fertilizer A (yellow #FFD43B),
+    Fertilizer B (purple #9467BD), and Fertilizer C (cyan #17BECF). Points are jittered
+    horizontally within each category to prevent overlap. A legend appears in the
+    top-right corner with a white background and border. The plot has subtle gray
+    gridlines and a clean white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; font sizes are appropriate
+          for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; jitter prevents point overlap effectively
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (radius 14) with good visibility; slight deduction
+          as some points in dense areas could benefit from transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette used (blue, yellow, purple, cyan - no red-green
+          conflicts)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot area is well-proportioned with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Plant Height (cm)" and "Treatment
+          Group"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), but legend placement in top-right overlaps
+          the data area for Fertilizer B/C categories
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical strip/scatter plot with jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, numeric values on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied, individual points visible, categories clearly separated
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis range (7-42 cm)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match category names correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-strip · highcharts · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: Control (low, moderate spread), Fertilizer A
+          (moderate, tight), Fertilizer B (high with outliers at 38, 40, and low at
+          12), Fertilizer C (bimodal distribution ~20 and ~30)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth experiment with fertilizer treatments is a classic,
+          neutral scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Plant heights 10-40 cm are realistic for seedling/small plant measurements
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Code has functions/loops but is relatively simple; however uses a
+          loop for series creation which is acceptable
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` is set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also saves plot.html (correct for interactive
+          library)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts built-in jitter feature which is good; could leverage
+          more interactive features like tooltips with custom formatting
+  verdict: APPROVED
diff --git a/plots/cat-strip/metadata/letsplot.yaml b/plots/cat-strip/metadata/letsplot.yaml
index c40bcebe2c..03e3d0c945 100644
--- a/plots/cat-strip/metadata/letsplot.yaml
+++ b/plots/cat-strip/metadata/letsplot.yaml
@@ -24,3 +24,178 @@ review:
   - Could leverage lets-plot interactive tooltip features for enhanced data exploration
   - Grid lines only on y-axis (x grid disabled) which is appropriate but minor grid
     could add depth
+  image_description: 'The plot displays a categorical strip plot showing plant growth
+    measurements (in cm) across five fertilizer treatments: Control, Nitrogen, Phosphorus,
+    Potassium, and Complete. Each category has approximately 25 data points displayed
+    with horizontal jitter to prevent overlap. The colors are distinct for each category:
+    blue for Control, yellow/gold for Nitrogen, green for Phosphorus, purple for Potassium,
+    and red/coral for Complete. The y-axis ranges from about 8 to 48 cm. The Control
+    group shows the lowest values (roughly 9-20 cm), while the Complete treatment
+    shows the highest values with greater spread (roughly 12-48 cm), including visible
+    outliers. The title "cat-strip · letsplot · pyplots.ai" appears at the top. Axis
+    labels are clear: "Fertilizer Treatment" on x-axis and "Plant Growth (cm)" on
+    y-axis. The background is clean with subtle horizontal grid lines. No legend is
+    shown since categories are labeled on the x-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized with good alpha for the data density (125 points),
+          jitter effectively separates points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinct but green/yellow could be challenging for some
+          colorblind viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units ("Plant Growth (cm)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; legend correctly hidden since x-axis
+          shows categories
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical strip plot with jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, numeric values on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied, individual points visible, distribution shown per
+          category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible, y-axis range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden (categories on x-axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-strip · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions across categories, includes outliers
+          in Complete group, demonstrates variance differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth with fertilizer treatments is a realistic, neutral scientific
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for plant growth (8-48 cm), though the outlier
+          at 48cm is quite high
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly but doesn't leverage lets-plot specific
+          features like tooltips or interactivity in a distinctive way
+  verdict: APPROVED
diff --git a/plots/cat-strip/metadata/matplotlib.yaml b/plots/cat-strip/metadata/matplotlib.yaml
index 4a4bda40b4..f524392be3 100644
--- a/plots/cat-strip/metadata/matplotlib.yaml
+++ b/plots/cat-strip/metadata/matplotlib.yaml
@@ -24,3 +24,179 @@ review:
     (labels in scatter calls)
   - No distinctive matplotlib features used beyond basic scatter - could leverage
     ax.errorbar() for confidence intervals or custom marker styles
+  image_description: The plot displays a categorical strip plot showing quality scores
+    (y-axis, 0-100 scale) for five manufacturing plants (Plant A through Plant E on
+    x-axis). Each plant's data points are displayed as colored circles with horizontal
+    jitter to prevent overlap. Plant A (blue, ~80-93 range), Plant B (yellow/gold,
+    wide spread ~55-95 with visible outliers), Plant C (teal/cyan, tight cluster ~82-95),
+    Plant D (orange, widest spread ~40-88 with low outliers), and Plant E (olive green,
+    moderate spread ~68-95). The title reads "cat-strip · matplotlib · pyplots.ai".
+    A subtle gray dashed grid is visible on the y-axis. Points have white edge outlines
+    and good transparency (alpha ~0.7).
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, jitter effectively separates data points
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized at s=150 with alpha=0.7, appropriate for ~27 points
+          per category. Minor deduction as slightly smaller markers might reduce minimal
+          point overlap in dense areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors (blue, yellow, teal, orange, olive) are distinguishable
+          and reasonably colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout applied
+          correctly
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Manufacturing Plant" and "Quality
+          Score (0-100)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Y-axis grid is subtle (alpha=0.3), but legend is present in code
+          but not visible in plot (label parameter used but ax.legend() not called)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical strip plot with individual points per category
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, numeric values on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied, individual observations visible, multiple categories
+          shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis limits (30-105) properly show all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Categories are labeled via x-tick labels (legend not strictly needed
+          as colors match tick positions)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-strip · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied distributions (tight vs wide spread), outliers (Plant
+          B: 55, 95; Plant D: 40, 45), different central tendencies'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Manufacturing quality control scenario is neutral, professional,
+          and relatable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Quality scores 0-100 make sense, distributions centered around 70-90
+          are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at the beginning
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API with ax methods
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic ax.scatter() which is standard; could have used ax.violinplot()
+          overlay, custom markers, or matplotlib-specific styling like zorder manipulation
+  verdict: APPROVED
diff --git a/plots/cat-strip/metadata/plotly.yaml b/plots/cat-strip/metadata/plotly.yaml
index a526ebb842..798c19b20e 100644
--- a/plots/cat-strip/metadata/plotly.yaml
+++ b/plots/cat-strip/metadata/plotly.yaml
@@ -26,3 +26,174 @@ review:
     - consider removing legend or using it for a different variable
   - Grid alpha at 0.1 is perhaps too subtle - 0.2-0.3 would be more visible while
     still unobtrusive
+  image_description: 'The plot displays a categorical strip plot showing plant height
+    measurements (in cm) across five soil types: Clay, Sandy, Loam, Peat, and Chalk.
+    Each category has its own distinct color: Clay in Python Blue (#306998), Sandy
+    in Python Yellow (#FFD43B), Loam in coral red (#E15759), Peat in teal (#76B7B2),
+    and Chalk in green (#59A14F). Individual data points are scattered vertically
+    within each category with horizontal jitter applied to prevent overlap. The title
+    "cat-strip · plotly · pyplots.ai" is centered at the top. The y-axis shows "Plant
+    Height (cm)" ranging from ~10 to ~80, and the x-axis shows "Soil Type" with the
+    five category labels. A legend in the upper right identifies each soil type by
+    color. The background is clean white with subtle horizontal gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, jitter effectively separates data points
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at size 14 with 0.7 opacity work well for ~25-28 points per
+          category; slightly large but visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with good contrast; no red-green confusion
+          issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, minor extra margin space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Plant Height (cm)" and "Soil Type"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (rgba 0.1 alpha), but legend duplicates x-axis labels
+          which is redundant
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical strip plot with jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, numeric values on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied, individual points visible, distributions shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match category names correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "cat-strip · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions (means, spreads), includes outliers
+          for Sandy and Loam; excellent variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth by soil type is a real, neutral scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Plant heights 10-80cm are realistic for many plant species
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Scatter with custom hover templates which is good, but could
+          leverage px.strip() for more idiomatic plotly usage; interactivity via HTML
+          is included
+  verdict: APPROVED
diff --git a/plots/cat-strip/metadata/plotnine.yaml b/plots/cat-strip/metadata/plotnine.yaml
index e7f9ae1cf5..81957b67b6 100644
--- a/plots/cat-strip/metadata/plotnine.yaml
+++ b/plots/cat-strip/metadata/plotnine.yaml
@@ -27,3 +27,175 @@ review:
   - Axis label Performance Score lacks units (e.g., Performance Score (%) or similar)
   - Some performance scores exceed 100 which may seem unrealistic for a typical percentage-based
     score
+  image_description: The plot displays a categorical strip plot with four product
+    categories (Product A, B, C, D) on the x-axis and Performance Score (ranging from
+    ~45 to ~115) on the y-axis. Points are jittered horizontally to reduce overlap.
+    Colors alternate between blue (#306998) for Products A and C, and yellow (#FFD43B)
+    for Products B and D. The title "cat-strip · plotnine · pyplots.ai" is at the
+    top. The plot uses a minimal theme with subtle gray grid lines. Each category
+    shows approximately 30 data points with different distributions - Product A has
+    a tight cluster around 70-80, Product B shows more spread with higher values,
+    Product C has a tight cluster around 65-75, and Product D shows the widest spread
+    from ~45 to ~115.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear, axis labels are well-sized, tick labels
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized with good alpha, though could be slightly larger
+          for this point count
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and yellow are distinguishable, but the alternating pattern
+          doesn't serve a clear purpose
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, well-proportioned margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (Performance Score could have units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, legend correctly hidden since colors
+          don't add information
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical strip plot with jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, numeric values on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied, individual points visible, multiple categories shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden (colors are redundant with x-axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-strip · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions (tight vs spread), different means,
+          but could show more obvious outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product performance scores is a plausible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Performance scores in 50-115 range are reasonable, though some values
+          slightly exceed 100
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses geom_jitter which is standard ggplot grammar, but doesn't leverage
+          plotnine-specific features like faceting, statistical transformations, or
+          more sophisticated scales
+  verdict: APPROVED
diff --git a/plots/cat-strip/metadata/pygal.yaml b/plots/cat-strip/metadata/pygal.yaml
index 0fb652ad0e..a7e82ca262 100644
--- a/plots/cat-strip/metadata/pygal.yaml
+++ b/plots/cat-strip/metadata/pygal.yaml
@@ -23,3 +23,180 @@ review:
   - Axis labels lack units (could be "Performance Score (%)" if percentages)
   - Grid could be slightly more subtle (current alpha via style is acceptable but
     could improve)
+  image_description: 'The plot displays a categorical strip plot showing performance
+    scores (y-axis, ranging from 40-100) across four departments (x-axis: Sales, Engineering,
+    Marketing, Support). Each department''s data points are shown as colored dots
+    with horizontal jitter to prevent overlap: Sales in blue (#306998), Engineering
+    in yellow (#FFD43B), Marketing in green (#4CAF50), and Support in pink (#E91E63).
+    The title "cat-strip · pygal · pyplots.ai" appears at the top. The plot has a
+    clean white background with subtle horizontal grid lines. A legend at the bottom
+    identifies each department by color. The y-axis is labeled "Performance Score"
+    and x-axis is labeled "Department". Each category has approximately 25 data points
+    showing varied distributions - Marketing shows the widest spread including an
+    outlier around 40, while Engineering shows a tighter cluster around 80-90.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable at full size, good font sizing
+          for the canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, jitter effectively separates points
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Dots are well-sized (dots_size=12), visible but could be slightly
+          larger for 100 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors that work for colorblind users (blue, yellow,
+          green, pink)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills appropriate space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Performance Score", "Department") but no
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend at bottom is well-placed but slightly far
+          from data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical strip plot with jittered points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, numeric values on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied, individual points visible, distributions shown per
+          category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (35-105) shows all data points including outlier at
+          40
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four departments
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "cat-strip · pygal · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions per category, includes outlier in Marketing,
+          varied spreads
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Performance scores across departments is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores 40-100 are realistic, though clipping to 40 minimum slightly
+          artificial
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style imported, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern pygal API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but path should be verified
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses XY chart, custom Style, legend_at_bottom, x_value_formatter
+          for category labels. Good use of pygal but no advanced SVG interactivity
+          or animations.
+  verdict: APPROVED
diff --git a/plots/cat-strip/metadata/seaborn.yaml b/plots/cat-strip/metadata/seaborn.yaml
index ea47b0479b..7a4a4c85d9 100644
--- a/plots/cat-strip/metadata/seaborn.yaml
+++ b/plots/cat-strip/metadata/seaborn.yaml
@@ -24,3 +24,171 @@ review:
     or violin for statistical summary)
   - Color palette includes two blue shades (Plant A and Plant C) that may be confused
     at a glance
+  image_description: 'The plot displays a categorical strip plot with 5 manufacturing
+    plants (Plant A-E) on the x-axis and Quality Score (%) on the y-axis (range ~35-100).
+    Each plant has ~25 data points shown as colored circles with horizontal jitter.
+    Colors used: Plant A (blue), Plant B (yellow), Plant C (light blue), Plant D (gray),
+    Plant E (brown/orange). Different distributions are visible - Plant C shows tight
+    clustering around 90, Plant D shows high variability with outliers down to ~40,
+    and other plants show intermediate patterns. Title format is correct: "cat-strip
+    · seaborn · pyplots.ai". Subtle horizontal grid lines are present. All text is
+    clearly legible.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, jitter prevents point overlap effectively
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized appropriately (size=12) with alpha=0.7 for 125 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Distinct colors but includes both blue shades that could be confused
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Quality Score (%)" and "Manufacturing Plant"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), no legend needed since colors match x-axis
+          labels
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical strip plot using sns.stripplot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, numeric values on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied, individual points visible, categorical comparison
+          shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range with appropriate limits (35-105)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, colors match x-axis categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "cat-strip · seaborn · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied distributions: tight (Plant C), variable (Plant D),
+          outliers visible'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Manufacturing quality control is plausible, though somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Quality scores 40-100% are realistic for manufacturing
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic stripplot without additional seaborn features like combining
+          with boxplot/violin, using statistical estimation, or dodge for grouped
+          data
+  verdict: APPROVED
diff --git a/plots/chernoff-basic/metadata/bokeh.yaml b/plots/chernoff-basic/metadata/bokeh.yaml
index 9f71dc84fe..83a463c41e 100644
--- a/plots/chernoff-basic/metadata/bokeh.yaml
+++ b/plots/chernoff-basic/metadata/bokeh.yaml
@@ -23,3 +23,189 @@ review:
   weaknesses:
   - Legend positioned too far from the faces (at very top of plot)
   - Faces could be slightly larger to better utilize the canvas space
+  image_description: 'The plot displays 12 Chernoff faces arranged in a 4x3 grid on
+    a light gray background. The title "chernoff-basic · bokeh · pyplots.ai" appears
+    at the top in black text. A legend at the top shows three colored circles representing
+    sectors: Tech (blue), Retail (yellow), and Energy (brown/tan). The top row contains
+    4 Tech company faces (blue, wider faces with smiling expressions indicating higher
+    metrics). The middle row shows 4 Retail company faces (yellow, more circular with
+    neutral expressions). The bottom row displays 4 Energy company faces (brown/tan,
+    narrower with frowning expressions indicating lower metrics). Each face is labeled
+    below (e.g., "Tech #1", "Retail #1"). A subtitle at the bottom explains the feature
+    mapping. The faces have clearly visible features: eyebrows, eyes with white sclera
+    and dark pupils, a simple nose line, and curved mouths.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt is excellent, labels at 20pt are readable, subtitle
+          at 22pt is clear. Minor deduction for slightly small subtitle relative to
+          canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated from faces
+          and each other.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Faces are well-sized for the grid, facial features (eyes, mouth,
+          eyebrows) are clearly distinguishable.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and brown are distinguishable for most colorblind users,
+          though the brown/tan is somewhat muted.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with faces well-distributed. Slight imbalance
+          with more whitespace at bottom than top.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type; subtitle explains feature mappings clearly.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is present and clear, but positioned quite far from faces
+          at the very top.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements Chernoff faces with facial features mapped to
+          variables.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Four variables correctly mapped: face width (revenue growth), face
+          height (profit margin), eye size (satisfaction), mouth curve (market share).'
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has face outline, eyes with pupils, eyebrows, nose, and mouth. Grid
+          layout implemented. Minor: could have more facial features for additional
+          variables.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data properly normalized and displayed.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Sector legend correctly identifies Tech, Retail, and Energy with
+          matching colors.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "chernoff-basic · bokeh · pyplots.ai" with middle
+          dots.
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows clear variation between sectors: Tech faces are wider with
+          smiles (high growth/market share), Retail faces are medium-sized with neutral
+          expressions, Energy faces are narrower with frowns (low growth). Good variety
+          within sectors too.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Excellent business context comparing company performance metrics
+          across three industry sectors. Plausible metric ranges.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All metrics normalized to 0-1 range as spec requires, with realistic
+          sector-specific distributions.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Generally follows KISS but has complex inline loop. Acceptable for
+          this complex plot type.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data generation.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, bokeh.io, bokeh.models, bokeh.plotting).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Correctly saves as plot.png using export_png.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource for hover tooltips, HoverTool for interactivity
+          (though not visible in PNG), Label objects for text positioning. Uses patch()
+          for drawing ellipses which is idiomatic Bokeh.
+  verdict: APPROVED
diff --git a/plots/chernoff-basic/metadata/highcharts.yaml b/plots/chernoff-basic/metadata/highcharts.yaml
index 9781905e15..8ed4f05db4 100644
--- a/plots/chernoff-basic/metadata/highcharts.yaml
+++ b/plots/chernoff-basic/metadata/highcharts.yaml
@@ -23,3 +23,164 @@ review:
   - Uses helper function create_face_svg() instead of flat KISS structure
   - Not technically using Highcharts library - implements pure SVG instead (understandable
     since Highcharts lacks native Chernoff face support)
+  image_description: 'The plot displays 9 Chernoff faces arranged in a 3×3 grid, representing
+    samples from the Iris dataset. Three species are color-coded: Setosa (blue), Versicolor
+    (yellow), and Virginica (purple) - each with 3 samples. The faces feature varying
+    facial characteristics: face width maps to sepal length, eye size to sepal width,
+    mouth curvature to petal length, and eyebrow slant to petal width. The title "chernoff-basic
+    · highcharts · pyplots.ai" appears at the top with a descriptive subtitle. Two
+    legend boxes on the right side clearly explain the species colors and feature
+    mappings. All faces are well-rendered with visible differences between species.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, labels, and legend text are all clearly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; faces and labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Faces are well-sized and distinct; facial features (eyes, mouth,
+          eyebrows) are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette (blue, yellow, purple) with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas with faces in a balanced 3×3 grid and legends
+          positioned appropriately
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Two well-designed legends explaining species and feature mappings
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Chernoff faces visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 4 Iris variables correctly mapped to facial features
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: grid layout, color by group, feature
+          mapping, normalization'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data properly represented across the faces
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Both species legend and feature mapping legend are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across species with different facial expressions,
+          though within-species variation is subtle
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses Iris dataset, a classic real-world scientific dataset
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Data is properly normalized to 0-1 range as recommended
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses functions (`create_face_svg`) instead of flat structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Does not use Highcharts library but implements custom SVG; acceptable
+          as Highcharts doesn't have native Chernoff face support
+  verdict: APPROVED
diff --git a/plots/chernoff-basic/metadata/letsplot.yaml b/plots/chernoff-basic/metadata/letsplot.yaml
index 2ec7496f26..312063b5d3 100644
--- a/plots/chernoff-basic/metadata/letsplot.yaml
+++ b/plots/chernoff-basic/metadata/letsplot.yaml
@@ -28,3 +28,165 @@ review:
   - Title uses lets-plot with hyphen instead of letsplot (single word)
   - Legend is somewhat small and isolated on the right side
   - Nose does not vary based on any data variable (static for all faces)
+  image_description: 'The plot displays 12 Chernoff faces arranged in a 3×4 grid layout.
+    Each row represents a different iris species: Setosa (top row, blue faces), Versicolor
+    (middle row, yellow faces), and Virginica (bottom row, red/pink faces). Each face
+    has distinct facial features including eyes with pupils, eyebrows, a nose, and
+    a mouth. The faces show variation in face width, eye size, mouth curvature (happy/sad),
+    and eyebrow slant across different samples. Setosa faces appear "happier" with
+    upward-curved mouths, while Virginica faces appear "sadder" with downward-curved
+    mouths. Each face is labeled with its species name below. A legend on the right
+    shows the three species colors. The title reads "Iris Species Comparison · chernoff-basic
+    · lets-plot · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and readable, species labels are bold and clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, faces well-spaced in grid
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Faces are well-sized and visible, facial features clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, red palette distinguishable but not ideal for colorblind
+          users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent grid layout utilizing canvas well
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is small and positioned far right, somewhat isolated
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Chernoff faces visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly mapped to facial features (face width, eye size,
+          mouth curvature, eyebrow slant)
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has grid layout, color by species, labels; missing nose length variation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data properly normalized and displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows species colors
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses "lets-plot" instead of "letsplot" in title
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across species and within species, demonstrates face
+          differences effectively
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses real Iris dataset, a classic multivariate dataset
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalized values (0-1) as recommended in spec
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses functions (create_face), violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 2
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (bonus for HTML)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Effectively uses ggplot grammar with geom_polygon, geom_path, geom_text,
+          scale_fill_manual, comprehensive theme customization
+  verdict: APPROVED
diff --git a/plots/chernoff-basic/metadata/matplotlib.yaml b/plots/chernoff-basic/metadata/matplotlib.yaml
index 151aaa9ce7..68c4b05a18 100644
--- a/plots/chernoff-basic/metadata/matplotlib.yaml
+++ b/plots/chernoff-basic/metadata/matplotlib.yaml
@@ -30,3 +30,180 @@ review:
     multivariate nature of the visualization
   - Could potentially add more facial features (e.g., ear size, hair) to map additional
     variables as mentioned in the spec
+  image_description: 'The plot displays a 3x3 grid of Chernoff faces representing
+    car ratings. Each face is colored by category: blue for Economy (top row), yellow
+    for Sports (middle row), and green for Luxury (bottom row). The faces have elliptical
+    shapes with eyes containing white sclera and black pupils, eyebrows, a simple
+    vertical nose with a base, and curved mouths. A category legend appears in the
+    upper right corner, and a feature mapping legend in the bottom left explains the
+    data-to-face mappings. Each face is labeled (Economy 1-3, Sports 1-3, Luxury 1-3).
+    The title "Car Ratings · chernoff-basic · matplotlib · pyplots.ai" is displayed
+    at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt is clear, labels at 13pt are readable, legend text
+          appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All text and face elements are well separated with no overlapping
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Faces are appropriately sized, facial features (eyes, nose, mouth)
+          are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green are colorblind-friendly and easily distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of square format for the 3x3 grid, but there is some wasted
+          space at the very top above the first row of faces
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for this plot type (no axes), but appropriately turned off
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Both legends (category and feature mapping) are well-placed and informative
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Chernoff faces visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 4 variables correctly mapped to facial features (width, height, eye
+          size, mouth curve)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: grid layout, color by category, feature
+          mapping legend, multiple facial features'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All faces fully visible within canvas
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Both legends are accurate and informative
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "Car Ratings · chernoff-basic · matplotlib
+          · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows variation across categories: Economy has wider faces (high
+          efficiency), Sports has taller faces (high power), Luxury has larger eyes
+          (reliability) and happier mouths (comfort). However, the variation within
+          categories could be more pronounced.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Car ratings is a neutral, realistic scenario appropriate for comparing
+          multivariate data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are normalized 0-1 as specified, but some category-specific
+          traits could show more dramatic differences
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: matplotlib.patches, matplotlib.pyplot, numpy'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses matplotlib.patches effectively for drawing faces, but this is
+          a custom implementation rather than leveraging any specialized matplotlib
+          features. The manual drawing with Ellipse, Circle, and plot is competent
+          but basic.
+  verdict: APPROVED
diff --git a/plots/chernoff-basic/metadata/plotly.yaml b/plots/chernoff-basic/metadata/plotly.yaml
index 3fcb6ecccd..59ac41b00e 100644
--- a/plots/chernoff-basic/metadata/plotly.yaml
+++ b/plots/chernoff-basic/metadata/plotly.yaml
@@ -26,3 +26,175 @@ review:
     very similar within each row making it hard to see individual differences
   - Title includes Iris Dataset prefix which deviates slightly from the pure spec-id
     format
+  image_description: 'The plot displays a 3x5 grid of 15 Chernoff faces representing
+    the Iris dataset. Each row corresponds to a species (Setosa in blue, Versicolor
+    in yellow, Virginica in green), with 5 samples per species. Each face has elliptical
+    shape with eyes (white with black pupils), eyebrows, a triangular nose, and a
+    curved mouth. The title reads "Iris Dataset · chernoff-basic · plotly · pyplots.ai"
+    at the top. A species legend is positioned in the upper right, and a "Feature
+    Mapping" box on the right side explains: Face Width → Sepal Length, Face Height
+    → Sepal Width, Eye Size → Petal Length, Smile → Petal Width. Row labels (Setosa,
+    Versicolor, Virginica) are on the left, and column labels (Sample 1-5) are at
+    the top of each column.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear, row/column labels readable, feature mapping
+          text is legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, faces well-spaced in grid
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Faces are clearly visible with good sizing; facial features (eyes,
+          mouth, eyebrows) are distinguishable. Minor: Setosa faces within species
+          show limited variation'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green palette is colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good grid layout, minor: slightly more whitespace at bottom than
+          top'
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Chernoff faces (no traditional axes), but row/column labels
+          present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Species legend well-placed, feature mapping legend excellent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Chernoff faces visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 4 iris features mapped to face width, height, eye size, mouth curvature
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid layout, color by species, feature mapping legend all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Data normalized 0-1 as specified
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Species legend and feature mapping are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format (includes spec-id, library, pyplots.ai)
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation across species, but within-species variation (especially
+          Setosa) could be more pronounced
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris dataset is a classic, well-known multivariate dataset
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Data properly normalized, realistic flower measurements
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, but code is somewhat
+          long/complex due to face drawing
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, plotly, sklearn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Plotly's shape system for custom graphics and creates interactive
+          HTML output; could leverage more Plotly interactivity features
+  verdict: APPROVED
diff --git a/plots/chernoff-basic/metadata/plotnine.yaml b/plots/chernoff-basic/metadata/plotnine.yaml
index 523231c75f..985edf2333 100644
--- a/plots/chernoff-basic/metadata/plotnine.yaml
+++ b/plots/chernoff-basic/metadata/plotnine.yaml
@@ -26,3 +26,181 @@ review:
   - Helper functions violate KISS principle; code should be linear without function
     definitions
   - The width/height parameters in plot.save() are redundant with figure_size in theme
+  image_description: 'The plot displays 6 Chernoff faces arranged in a 3x2 grid layout
+    representing car performance metrics. Each face is labeled with car names: Compact
+    A, Compact B, SUV A (top row) and SUV B, Sedan A, Sedan B (bottom row). Faces
+    are color-coded by category: dark blue for Compact, lighter blue for SUV, and
+    yellow for Sedan. The faces show varying widths, heights, eye sizes, and mouth
+    curvatures corresponding to engine power, fuel efficiency, safety rating, and
+    comfort score respectively. The title "chernoff-basic · plotnine · pyplots.ai"
+    appears at the top with a descriptive subtitle. A legend at the bottom indicates
+    Category colors. Compact cars have narrower/taller faces with frowning mouths
+    (lower comfort), while Sedans have wider faces with smiling mouths (higher comfort).
+    The visualization effectively uses white background with void theme.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, subtitle at 16pt, strip text at 14pt bold, legend
+          text at 14-16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements; faces well-separated in facet panels
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Faces are appropriately sized with clear facial features; pupils
+          could be slightly larger for better visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe; good contrast between
+          categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 16:9 canvas with well-proportioned grid layout and
+          centered legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Chernoff faces (using theme_void which is appropriate)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom, appropriate for categorical color mapping
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Chernoff faces implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Variables correctly mapped: engine_power→face width, fuel_efficiency→face
+          height, safety_rating→eye size, comfort_score→mouth curvature'
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: facial features, normalization, grid
+          layout, color by category'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All faces visible with appropriate scaling
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Category with accurate colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "chernoff-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in all mapped features; good contrast between car
+          categories; could show more extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Car performance metrics is a realistic, neutral, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic values (120-280 HP, 18-35 MPG, 4.2-4.8 safety, 3.5-4.5
+          comfort)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses helper functions (make_ellipse, make_eye, make_mouth, make_eyebrow,
+          normalize_column) which violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses plot.save() correctly but save call uses width/height instead
+          of relying on figure_size in theme
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar (geom_polygon, geom_path, geom_point,
+          facet_wrap, theme_void, coord_fixed), but Chernoff faces require manual
+          geometry construction which doesn't showcase plotnine's statistical/declarative
+          strengths
+  verdict: APPROVED
diff --git a/plots/chernoff-basic/metadata/pygal.yaml b/plots/chernoff-basic/metadata/pygal.yaml
index 08ad326a5e..5c9a165443 100644
--- a/plots/chernoff-basic/metadata/pygal.yaml
+++ b/plots/chernoff-basic/metadata/pygal.yaml
@@ -26,3 +26,182 @@ review:
     each face
   - Some unused vertical space at the bottom of the canvas could be better utilized
   - Faces could be slightly larger to better fill the available canvas space
+  image_description: The plot displays 5 Chernoff faces arranged horizontally representing
+    car performance metrics (Sedan A, SUV B, Sports C, Compact D, Luxury E). Each
+    face is rendered as a colored ellipse (blue, yellow, teal, coral, purple respectively)
+    with semi-transparent fill and solid outline. Faces contain white eyes with dark
+    pupils, eyebrows with varied slants, vertical nose lines of different lengths,
+    and curved mouths (smiling/frowning). Below each face is a bold label with the
+    car name. The title "Car Performance Comparison · chernoff-basic · pygal · pyplots.ai"
+    appears at top center. A feature mappings legend explains the 7 attribute mappings
+    (Face Width = Engine Power, etc.) arranged in two rows. A color legend at bottom
+    identifies each car by its color swatch. The layout uses a white background with
+    good spacing between elements.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, labels, and legend text are all clearly readable at full size
+          with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements; faces are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Faces are well-sized and visible; facial features clearly distinguishable,
+          though faces could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with distinct hues (blue, yellow, teal, coral,
+          purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good horizontal distribution; legend area well organized; some unused
+          vertical space at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Chernoff faces; feature mappings serve as axis explanation
+          - full marks
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Feature mappings legend is informative; color legend is helpful but
+          could be more integrated
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Chernoff faces implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 7 variables correctly mapped to facial features
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: face width/height, eye size/spacing,
+          mouth curve, nose length, eyebrow slant'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Data normalized 0-1 as specified
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Color legend duplicates the face labels which are already below each
+          face; could be more useful
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Car Performance Comparison · chernoff-basic · pygal
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied facial expressions (happy/serious), different face sizes,
+          varied eye sizes; good differentiation between cars
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Car performance metrics is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values normalized 0-1 as appropriate; mappings to car metrics are
+          sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses inline code without functions, but the SVG manipulation is complex
+          by necessity
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern pygal and ET usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png, plot.svg, and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal for SVG infrastructure and custom style, then extends
+          with custom SVG elements for Chernoff faces. Creative approach but pygal
+          itself cannot render Chernoff faces natively, so this is a hybrid solution.
+  verdict: APPROVED
diff --git a/plots/chord-basic/metadata/altair.yaml b/plots/chord-basic/metadata/altair.yaml
index 0a156fee9d..1c8c07ca79 100644
--- a/plots/chord-basic/metadata/altair.yaml
+++ b/plots/chord-basic/metadata/altair.yaml
@@ -27,3 +27,171 @@ review:
     geometry calculations which reduces the declarative advantage of Altair
   - Labels could benefit from slight rotation for better alignment with arc positions
   - Flow values lack units in the data context
+  image_description: 'The plot displays a chord diagram showing migration flows between
+    6 continents. The outer ring consists of colored arc segments: Europe (dark blue
+    #306998), North America (golden yellow #FFD43B), Asia (teal #4ECDC4), Africa (coral
+    red #FF6B6B), South America (light green/mint #95E1D3), and Oceania (purple #A86EDB).
+    Each continent''s arc size is proportional to its total flow volume. Semi-transparent
+    chords connect the continents with widths corresponding to flow magnitudes. Labels
+    are positioned outside each arc segment with matching colors. The title "chord-basic
+    · altair · pyplots.ai" is displayed at the top center. A legend is present on
+    the right side showing the Region color mapping.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clear and readable, font sizes are appropriate
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Chords are visible with good opacity (0.6), arc segments clearly
+          defined
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct color palette, colorblind-friendly with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-centered diagram, good use of canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for chord diagram (no traditional axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right, clean styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct chord diagram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target/value correctly mapped to arcs and chords
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: bidirectional flows, proportional chord widths,
+          distinct colors'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All entities and connections visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all regions
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: chord-basic · altair · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bidirectional flows, varying magnitudes, 6 entities with 15
+          connections
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Migration flows between continents is a perfect real-world use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Flow values (12-55) are plausible but abstract (no units specified)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random, uses fixed dataset)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Missing explicit png check
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative grammar with layering, tooltips enabled
+          for interactivity, HTML export. However, chord diagrams are not a native
+          Altair chart type, requiring manual construction.
+  verdict: APPROVED
diff --git a/plots/chord-basic/metadata/bokeh.yaml b/plots/chord-basic/metadata/bokeh.yaml
index cb638a0fcd..134d9f43d5 100644
--- a/plots/chord-basic/metadata/bokeh.yaml
+++ b/plots/chord-basic/metadata/bokeh.yaml
@@ -25,3 +25,171 @@ review:
   - Contains helper function bezier_chord() which violates KISS structure
   - Bidirectional flows are combined rather than shown as separate chords (spec mentions
     both directions should be visible separately)
+  image_description: 'The plot displays a chord diagram visualizing migration flows
+    between 6 continents arranged in a circle. Each continent is represented by a
+    colored arc segment: Africa (blue #306998), Asia (yellow #FFD43B), Europe (orange
+    #E69F00), N. America (light blue #56B4E9), S. America (green #009E73), and Oceania
+    (pink #CC79A7). Bezier-curved chords connect the entities with widths proportional
+    to flow magnitude. The title "chord-basic · bokeh · pyplots.ai" appears at the
+    top center. A legend titled "Migration Flows" is positioned on the right side
+    with colored boxes for each continent. Entity labels are placed outside their
+    respective arcs in matching colors. The Bokeh interactive toolbar (hover, pan,
+    wheel_zoom, reset) is visible in the top-right corner. The plot uses a square
+    3600×3600 format.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 36pt, labels at 26pt, legend at 20-22pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels well-positioned outside arcs
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Chords clearly visible with good alpha (0.55), arc segments distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with good differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, chord diagram well-centered with legend on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for chord diagrams (no traditional axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate), legend well-placed and clear
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct chord diagram with arcs and bezier curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target entities correctly mapped around perimeter
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Chord width proportional to flow, distinct colors, hover tooltips
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 6 continents visible with all connections shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all entities with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: chord-basic · bokeh · pyplots.ai'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows bidirectional flows combined, varying flow magnitudes, but
+          individual bidirectional flows not separately visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Migration between continents is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Flow values in millions are sensible, though some values could show
+          more variation
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Contains helper function `bezier_chord()` which violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` (though data is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: HoverTool with custom tooltips, ColumnDataSource, patches glyph,
+          interactive tools
+  verdict: APPROVED
diff --git a/plots/chord-basic/metadata/highcharts.yaml b/plots/chord-basic/metadata/highcharts.yaml
index 0c83b7d906..9f07e933c2 100644
--- a/plots/chord-basic/metadata/highcharts.yaml
+++ b/plots/chord-basic/metadata/highcharts.yaml
@@ -21,3 +21,179 @@ review:
   weaknesses:
   - Europe label is missing from the visible portion of the diagram - label may be
     positioned outside visible area or obscured
+  image_description: 'The plot displays a chord/dependency wheel diagram showing migration
+    flows between 6 continents. The diagram is circular with entities arranged around
+    the perimeter: Oceania (orange) at the top, Europe (blue, arc visible but label
+    missing), South America (green) on the left, Africa (purple) on the lower-left,
+    Asia (yellow) at the bottom, and North America (cyan) on the right. Colored ribbons/chords
+    connect the continents, with ribbon width proportional to migration flow magnitude.
+    The title "chord-basic · highcharts · pyplots.ai" appears at the top with a subtitle
+    "Migration Flows Between Continents (Millions)". The background is white and all
+    visible labels are clearly readable.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, and continent labels are all clearly readable at
+          full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or visual elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Chords are well-sized with appropriate opacity (0.6), clearly showing
+          flow magnitudes
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, purple, cyan, green,
+          orange) - no red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, diagram well-centered, minor issue with Europe
+          label not visible
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for chord diagrams, continent names serve as labels - all appropriately
+          descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend disabled appropriately as nodes are labeled, but Europe label
+          missing reduces clarity
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct chord/dependency wheel diagram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target/value correctly mapped to chord connections
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Bidirectional flows visible, chord width proportional to flow, distinct
+          colors per entity
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 6 continents and 30 connections displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Node labels correctly identify each continent
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: "chord-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: bidirectional flows, varying magnitudes, all
+          6 continents connected'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Migration flows between continents is a perfect real-world scenario
+          for chord diagrams
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Values in millions are plausible, though some flows seem high (e.g.,
+          55M Asia→North America)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed), but depends on external
+          JS downloads
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts dependency wheel which is native chord diagram support,
+          includes tooltips configuration, but could leverage more interactive features
+          in HTML output
+  verdict: APPROVED
diff --git a/plots/chord-basic/metadata/letsplot.yaml b/plots/chord-basic/metadata/letsplot.yaml
index 643e06e068..07451f23f3 100644
--- a/plots/chord-basic/metadata/letsplot.yaml
+++ b/plots/chord-basic/metadata/letsplot.yaml
@@ -24,3 +24,168 @@ review:
     format)
   - Chord colors only show source color; adding gradient or target indication would
     improve bidirectional flow clarity
+  image_description: The plot displays a chord diagram showing migration flows between
+    6 continents arranged in a circle. Asia (blue) is positioned at the top-right,
+    Europe (yellow) at top-left, Africa (green) at center-left, North America (red)
+    at bottom, South America (purple) at bottom-right, and Oceania (teal) at right.
+    Curved chords connect the continents with varying widths proportional to flow
+    magnitude. The thickest chords appear between Asia-Europe, Africa-Europe, and
+    South America-North America. Labels are positioned outside the ring in bold dark
+    text. A horizontal legend at the bottom identifies all continents. The title "Migration
+    Flows Between Continents · chord-basic · letsplot · pyplots.ai" appears at the
+    top. The background is white/clean.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, continent labels, and legend text are all clearly readable
+          at the output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels well-positioned outside the ring
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Chords are visible with good alpha (0.55), though some thin chords
+          are subtle
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Six distinct colors used; mostly colorblind-safe but red-green proximity
+          could be improved
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format perfect for circular diagram, good proportions
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well placed at bottom, no distracting grid
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct chord diagram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target/value correctly mapped to chords
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Chord width proportional to flow, distinct entity colors; bidirectional
+          flows shown but not distinctly colored at both ends
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All entities and connections visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all continents
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title present but "Migration Flows Between Continents" should ideally
+          be after the spec-id format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple bidirectional flows, varying magnitudes; could show
+          more extreme flow differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Migration between continents is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Flow values (8-45) are plausible relative magnitudes
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png but also plot.html (correct for letsplot)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar, geom_polygon, scale_fill_manual, coord_fixed;
+          could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/chord-basic/metadata/matplotlib.yaml b/plots/chord-basic/metadata/matplotlib.yaml
index 14ce7949d8..3bb948d94d 100644
--- a/plots/chord-basic/metadata/matplotlib.yaml
+++ b/plots/chord-basic/metadata/matplotlib.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/chord-basic/metadata/plotly.yaml b/plots/chord-basic/metadata/plotly.yaml
index 811a935c6d..b6c933b217 100644
--- a/plots/chord-basic/metadata/plotly.yaml
+++ b/plots/chord-basic/metadata/plotly.yaml
@@ -23,3 +23,169 @@ review:
   weaknesses:
   - Some smaller flow chords are difficult to distinguish due to overlapping
   - Interactive hover on individual chords would enhance the HTML version
+  image_description: 'The chord diagram displays migration flows between 6 continents
+    arranged in a circular layout. The outer ring shows colored arcs for each continent:
+    Africa (blue, #306998), Asia (yellow, #FFD43B), Europe (green, #2E8B57), N. America
+    (red, #DC143C), S. America (purple, #9370DB), and Oceania (orange, #FF8C00). Continent
+    labels are positioned around the perimeter with rotated text for readability.
+    Chords connect the continents with widths proportional to migration flow values,
+    using semi-transparent fills (opacity 0.6) to show overlapping flows. The title
+    "Migration Flows Between Continents · chord-basic · plotly · pyplots.ai" appears
+    at the top. A legend on the right identifies each continent. The layout is clean
+    with a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and legend are clear; perimeter labels are readable but some
+          rotation angles could be improved
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Chords are well-sized with good opacity; some thin flows harder to
+          see
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors for all 6 continents, colorblind-friendly palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-centered circle with good margins and legend placement
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate), legend well-positioned on right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct chord diagram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target/value correctly mapped to arcs and chords
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, proportional widths,
+          bidirectional flows'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the circular layout
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 6 continents
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Migration Flows Between Continents · chord-basic
+          · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bidirectional flows with varying magnitudes; good variety in
+          flow sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Migration flows between continents is a realistic and comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible migration units; largest flows from Europe/Asia
+          align with reality
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) (though data is actually hardcoded matrix)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also plot.html (minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with SVG paths for chords; could leverage hover interactivity
+          more in the HTML output
+  verdict: APPROVED
diff --git a/plots/choropleth-basic/metadata/altair.yaml b/plots/choropleth-basic/metadata/altair.yaml
index 6435636e43..e38e59b83f 100644
--- a/plots/choropleth-basic/metadata/altair.yaml
+++ b/plots/choropleth-basic/metadata/altair.yaml
@@ -29,3 +29,177 @@ review:
     gray)
   - Could leverage Altair's interactive features (.interactive() for zoom/pan or selection
     highlights) to better showcase library strengths
+  image_description: The plot displays a choropleth map of the contiguous United States
+    showing county-level unemployment rates. The map uses a sequential blue color
+    scheme ranging from light blue (low unemployment ~0%) to dark blue (high unemployment
+    ~24%). The title reads "US County Unemployment · choropleth-basic · altair · pyplots.ai"
+    at the top. A vertical color legend on the right side labeled "Unemployment (%)"
+    shows the scale from 0.00 to 0.24. County boundaries are clearly visible with
+    thin white strokes. Notable regional patterns include higher unemployment (darker
+    blue) in parts of the Southwest (Arizona), Southern Texas, the Mississippi Delta
+    region, and Appalachia, with generally lower rates in the Upper Midwest and Plains
+    states. Alaska and Hawaii are included in the projection (bottom left). The map
+    fills most of the canvas with well-balanced whitespace.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt is clear, legend title at 20pt and labels at 16pt are
+          all readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Counties are well-sized, boundaries clearly visible with 0.3px white
+          stroke
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues sequential palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good canvas utilization, map fills ~70% of space, minor: legend
+          could be slightly closer'
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for choropleth maps (no axes), but legend title is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed, appropriate gradient length
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct choropleth map type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Unemployment rate correctly mapped to color
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has color legend with value range, appropriate projection (Albers
+          USA), clear boundaries, tooltips. Minor: missing explicit handling of missing
+          data (shows as light/no fill)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-25% range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Unemployment (%)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{context} · {spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows regional variation well with clear patterns, shows both high
+          and low unemployment areas
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses real US unemployment data from vega_datasets, neutral topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 0-25% are realistic for unemployment rates, though domain
+          set to 0.25 while legend shows decimal (0.24)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple import → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic vega_datasets data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and vega_datasets imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' and 'plot.html'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive Altair features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses topo_feature for geographic data, transform_lookup for joining
+          data, declarative encoding, tooltips. Good but could add interactivity (`.interactive()`)
+          or selection features.
+  verdict: APPROVED
diff --git a/plots/choropleth-basic/metadata/bokeh.yaml b/plots/choropleth-basic/metadata/bokeh.yaml
index f6024e4270..99985f1299 100644
--- a/plots/choropleth-basic/metadata/bokeh.yaml
+++ b/plots/choropleth-basic/metadata/bokeh.yaml
@@ -25,3 +25,170 @@ review:
     the "US Population Density" prefix
   - Tile grid approach, while creative and readable, differs from traditional geographic
     projection that spec mentions (Robinson for world, Albers for US)
+  image_description: The plot displays a tile grid map of US population density using
+    rectangular tiles arranged to approximate the geographic positions of all 50 US
+    states plus DC. Each state is represented by a colored rectangle with a 2-letter
+    state abbreviation. The color scheme uses a sequential blue palette (Blues9) where
+    lighter blues indicate lower population density and darker blues indicate higher
+    density. States like NJ, RI, MA, CT, DE, MD, and FL appear in darker blue shades
+    (high density), while states like AK, WY, MT, ND, SD appear in very light blue
+    (low density). DC is shown in gray, demonstrating missing data handling. The title
+    reads "US Population Density · choropleth-basic · bokeh · pyplots.ai" at the top.
+    A color bar legend is positioned on the right side showing "Population Density
+    (per sq mile)" with a logarithmic scale ranging from approximately 1 to 1200.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, labels at 18pt are readable; slightly smaller than
+          ideal for state abbreviations
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; each state tile is separate with clear spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles are well-sized and clearly visible with good spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues9 sequential palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; slight imbalance with color bar placement creating
+          some empty space in lower right
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Color bar well-placed with clear title and tick labels
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct choropleth map representation using tile grid approach
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Population density correctly mapped to color intensity
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has color legend, clear boundaries, missing data handling; tile grid
+          approach is creative but not traditional geographic projection
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range of density values displayed from 1 (AK) to 1263 (NJ)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color bar accurately shows population density scale
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title includes extra "US Population Density" prefix; should be just
+          "choropleth-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows wide range of density values, missing data handling; but tile
+          grid loses some geographic context
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: US population density is a real, neutral, educational topic with
+          accurate data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic population density values (1-1263 people per sq mile)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear structure but has some complexity with manual color
+          mapping logic
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random values
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Bokeh's ColumnDataSource, LogColorMapper, ColorBar,
+          LabelSet, and rect glyph
+  verdict: APPROVED
diff --git a/plots/choropleth-basic/metadata/highcharts.yaml b/plots/choropleth-basic/metadata/highcharts.yaml
index bbccbf7170..06067cc7e1 100644
--- a/plots/choropleth-basic/metadata/highcharts.yaml
+++ b/plots/choropleth-basic/metadata/highcharts.yaml
@@ -24,3 +24,179 @@ review:
   - Some small country labels may be difficult to read (e.g., Monaco, Andorra, Luxembourg)
   - Legend could have more refined styling/spacing
   - Could benefit from hover tooltips being more prominent in static image context
+  image_description: The plot shows a choropleth map of Europe displaying population
+    density (people per km²). The title "choropleth-basic · highcharts · pyplots.ai"
+    appears at the top in bold black text, with a subtitle "Population Density (people
+    per km²)" below. Countries are colored using a sequential blue color palette ranging
+    from very light blue (low density ~0) to dark navy blue (high density ~550). The
+    Netherlands appears darkest blue (highest density), followed by Belgium and UK.
+    Scandinavian countries (Norway, Sweden, Finland) appear lightest indicating low
+    density. A vertical color legend on the right side shows the density scale from
+    0-550 per km². Country names are labeled directly on the map. Countries without
+    data appear in light gray. The map is well-proportioned and fills the canvas appropriately.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, legend, and country labels all readable. Some smaller
+          country labels are a bit small but still legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; labels are positioned well within or
+          near their countries.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Map regions clearly visible with good borders. Some very small countries
+          hard to distinguish.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue palette is colorblind-safe; no red-green issues.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, map fills most of the space. Legend positioned
+          well on the right.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for maps, but subtitle includes units (per km²).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is clear and well-placed, but could benefit from better styling.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct choropleth map implementation.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Regions correctly colored by population density values.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has color legend, clear boundaries, handles missing data (gray).
+          Projection is appropriate.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Color scale shows full range 0-550.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately shows density scale with proper labeling.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: `choropleth-basic · highcharts · pyplots.ai`'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows good variation: high density (Netherlands 508), medium (Germany
+          234), low (Norway 15). Could include more extreme cases.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Population density by European country is a realistic, neutral scenario
+          with plausible values.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for Europe (Netherlands ~500, Norway ~15). Good
+          range representation.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean script structure: imports → data → config → HTML generation
+          → screenshot.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually static/deterministic.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (json, tempfile, time, urllib, Path, numpy,
+          selenium).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts map API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves plot.png and plot.html correctly.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Highmaps for geographic visualization, TopoJSON integration,
+          custom color stops, data labels, and hover states. Could use more advanced
+          features like drilldown.
+  verdict: APPROVED
diff --git a/plots/choropleth-basic/metadata/letsplot.yaml b/plots/choropleth-basic/metadata/letsplot.yaml
index dec9d0ee9e..5ede7daeea 100644
--- a/plots/choropleth-basic/metadata/letsplot.yaml
+++ b/plots/choropleth-basic/metadata/letsplot.yaml
@@ -25,3 +25,171 @@ review:
     coverage recommended
   - Extreme outlier (Luxembourg 126.4k) compresses color scale making mid-range countries
     harder to distinguish
+  image_description: The plot displays a choropleth map of Europe showing GDP per
+    capita by country. Countries are shaded using a yellow-to-blue color gradient
+    (low=#FFD43B to high=#306998). Most Eastern European countries appear in yellow/golden
+    tones (lower GDP), while Nordic countries (Norway, Sweden, Finland) and Western
+    European nations (Netherlands, Belgium, Austria, Switzerland) show brownish-gray
+    tones (higher GDP). Luxembourg appears as a tiny blue spot (highest GDP at ~126k
+    USD). The map includes clear country boundaries with dark gray outlines. A vertical
+    color legend in the lower-right displays "GDP per Capita (thousands USD)" with
+    scale from 20 to 120. The title at the top reads "European GDP per Capita · choropleth-basic
+    · letsplot · pyplots.ai". The map uses a clean white background with no grid lines
+    or axis labels, which is appropriate for geographic visualization.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and readable (26pt), legend text is clear. Slightly
+          under ideal sizes but very legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Countries are well-sized and clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Yellow-to-blue gradient is colorblind-friendly, though the mid-tones
+          (brownish) could be slightly more distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Map fills canvas well, legend is appropriately positioned, good use
+          of coordinate limits to focus on Europe
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-placed but very small relative to the map size; the
+          legend bar could be larger and more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct choropleth map implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Country-to-GDP mapping correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes color legend with value range, visible boundaries, sequential
+          color palette
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data values displayed correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labeled with units
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good range of GDP values (13.9k-126.4k), demonstrates geographic
+          patterns well. Missing data handling shown (some countries appear gray)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real-world scenario with plausible European GDP per capita values
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic but Luxembourg's 126.4k is somewhat outlier-heavy,
+          slightly skewing the color distribution
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → geocode → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: No random seed, but data is deterministic; geocoding service may
+          vary
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses lets-plot's geocode_countries() and geom_map() effectively,
+          demonstrating the library's geographic capabilities
+  verdict: APPROVED
diff --git a/plots/choropleth-basic/metadata/matplotlib.yaml b/plots/choropleth-basic/metadata/matplotlib.yaml
index ed11d37ec3..8942eab33c 100644
--- a/plots/choropleth-basic/metadata/matplotlib.yaml
+++ b/plots/choropleth-basic/metadata/matplotlib.yaml
@@ -27,3 +27,185 @@ review:
     4800×2700 display; 14-15pt would improve readability
   - Layout slightly unbalanced with extra whitespace on left side due to Alaska/Hawaii
     placement
+  image_description: The plot displays a tile grid map of the United States showing
+    population density per square mile. Each state is represented as a rounded square
+    tile arranged to approximate geographic positions. Colors use a sequential Blues
+    colormap ranging from very light blue (low density, e.g., Wyoming at 6) to dark
+    navy (high density, e.g., New Jersey at 1263). State abbreviations are displayed
+    in bold text centered on each tile - white text on darker backgrounds, blue text
+    on lighter backgrounds. Alaska and Hawaii are positioned at the bottom-left. The
+    District of Columbia tile shows gray with diagonal hatching to demonstrate missing
+    data handling. A vertical colorbar on the right shows the scale from 0 to 1200+
+    per sq mile. A "No data" legend appears in the lower left. The title "US Population
+    Density · choropleth-basic · matplotlib · pyplots.ai" appears at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt is excellent, colorbar label at 18pt is good, state
+          abbreviations at 13pt are readable but could be slightly larger for 4800×2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all state labels are clearly separated within
+          their tiles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tile sizes are well-proportioned, colors are clearly distinguishable,
+          spacing between tiles is effective
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues sequential colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though map could be slightly more centered
+          (extra space on left due to AK/HI placement)
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for map visualizations - no axes shown (appropriate for this
+          plot type)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend for missing data is well-placed and clear, colorbar is appropriately
+          sized
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct choropleth map implementation using tile grid approach
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Population density correctly mapped to color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: color legend with value range, region
+          boundaries visible, sequential color palette, missing data handling (DC
+          shown gray with hatching)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range of values displayed from Alaska (1) to New Jersey (1263)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Population Density (per sq mile)", missing
+          data legend accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "US Population Density · choropleth-basic · matplotlib
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows all 50 states plus DC, demonstrates full density range from
+          sparse (WY: 6) to dense (NJ: 1263), demonstrates missing data handling.
+          Minor: could show more regional variation in missing data.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: US population density is a classic, neutral, real-world use case
+          for choropleth maps
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic approximations of actual US state population
+          densities. Most values are accurate, though some minor discrepancies exist.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: 'Data is deterministic (hardcoded), but no random seed needed. Minor:
+          very long inline data definitions could benefit from cleaner organization.'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.patches, pyplot, PatchCollection)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of matplotlib-specific features (FancyBboxPatch with rounded
+          corners, PatchCollection for efficient rendering, ScalarMappable for colorbar).
+          Could use additional matplotlib features like custom projections or annotations.
+  verdict: APPROVED
diff --git a/plots/choropleth-basic/metadata/plotly.yaml b/plots/choropleth-basic/metadata/plotly.yaml
index 88cda1a524..b794a4c666 100644
--- a/plots/choropleth-basic/metadata/plotly.yaml
+++ b/plots/choropleth-basic/metadata/plotly.yaml
@@ -23,3 +23,168 @@ review:
   - Title format includes extra prefix before the spec-id pattern
   - Country border lines could be slightly thicker for better boundary visibility
   - Could add hover templates to enhance interactive HTML with formatted GDP values
+  image_description: The plot displays a choropleth map of Europe showing GDP per
+    capita data using the Viridis color scale. Colors range from dark purple (low
+    GDP ~10-20k USD) to bright yellow (high GDP ~120k USD). Luxembourg and Ireland
+    appear in bright yellow/green (highest GDP), Scandinavian countries in teal/green,
+    Western Europe (Germany, France, UK) in medium teal, and Eastern European countries
+    (Serbia, Bulgaria, Romania) in dark purple (lowest GDP). Russia and non-included
+    countries appear in gray (missing data). The title is centered at the top, a vertical
+    colorbar legend on the right shows the GDP scale, coastlines are visible with
+    light blue oceans, and country borders have subtle gray outlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, colorbar label at 20pt, tick labels at 16pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Countries clearly visible with good color differentiation; borders
+          slightly thin but acceptable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is a colorblind-safe sequential palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Map fills canvas well with balanced margins, colorbar well positioned
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar legend well placed, subtle coastlines
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct choropleth map type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Region IDs (ISO-3 codes) correctly mapped to GDP values
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has color legend, clear boundaries, handles missing data (gray);
+          could use more prominent missing data handling per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 30 countries displayed, full color range utilized
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately reflects data range
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title includes spec-id, library, and pyplots.ai but adds contextual
+          prefix
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good variation from low GDP (Serbia ~9k) to high (Luxembourg
+          ~125k); covers Western, Northern, Southern, Eastern Europe
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GDP per capita is a perfect economic indicator example, neutral topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic and match actual approximate GDP per capita
+          figures
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) though data is deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, plotly.express)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly Express API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses px.choropleth with good geographic customization (scope, projection),
+          generates interactive HTML; could leverage more Plotly features like hover
+          customization
+  verdict: APPROVED
diff --git a/plots/choropleth-basic/metadata/plotnine.yaml b/plots/choropleth-basic/metadata/plotnine.yaml
index 9cf0bca131..e51624f54d 100644
--- a/plots/choropleth-basic/metadata/plotnine.yaml
+++ b/plots/choropleth-basic/metadata/plotnine.yaml
@@ -23,3 +23,178 @@ review:
   - Country shapes are highly stylized polygons rather than realistic geographic shapes
   - Legend colorbar could benefit from more tick marks for finer granularity
   - Some white space at image corners could be better utilized
+  image_description: The plot shows a stylized choropleth map of European countries
+    using simplified polygon shapes. The map displays population density data with
+    a sequential Blues colormap. The Netherlands appears darkest blue (~521 per km²),
+    followed by Belgium and UK in medium-dark blue, while Nordic countries (Norway,
+    Sweden, Finland) appear lightest. Czechia is shown in gray to demonstrate missing
+    data handling. Each country is labeled with bold black text, with labels adjusted
+    to avoid overlap in crowded central Europe. The title "choropleth-basic · plotnine
+    · pyplots.ai" is displayed at the top in large bold text. The legend on the right
+    shows "Population Density (per km²)" with a vertical colorbar ranging from 0 to
+    500. The background is light blue (#f0f5fa) and country boundaries are clearly
+    visible with dark gray outlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title large and readable at 28pt, country labels clearly visible
+          at size 8 with bold weight, legend text appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Label offsets successfully prevent overlap in crowded central Europe
+          region (Netherlands, Belgium, Germany, Denmark)
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Polygons are well-sized with good alpha (0.95), boundaries clearly
+          visible with 0.6pt stroke
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues sequential colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of 16:9 canvas, map centered but some empty space at corners
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Axes hidden as appropriate for map; legend has proper units "(per
+          km²)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed on right with appropriate sizing; grid removed
+          as appropriate for map type
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct choropleth map implementation using polygons
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Density values correctly mapped to fill color
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has color legend, clear boundaries, missing data handling (gray);
+          projection is approximate but acceptable for stylized map
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Color scale encompasses all data values (0-550 for max 521)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Population Density (per km²)"
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format but uses middot (·) instead of standard bullet
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full range from low density (Norway 15) to high density (Netherlands
+          521), includes missing data example (Czechia)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real European countries with realistic 2024 population density values
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are accurate for actual country population densities
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_polygon, aes mapping, scale_fill_cmap,
+          coord_fixed, and theme customization; however plotnine doesn't have native
+          geographic support so this is a creative workaround rather than using distinctive
+          features
+  verdict: APPROVED
diff --git a/plots/choropleth-basic/metadata/pygal.yaml b/plots/choropleth-basic/metadata/pygal.yaml
index 3463afd236..d59daa45a7 100644
--- a/plots/choropleth-basic/metadata/pygal.yaml
+++ b/plots/choropleth-basic/metadata/pygal.yaml
@@ -22,3 +22,176 @@ review:
   weaknesses:
   - Missing data countries shown as white rather than gray as suggested in spec
   - Could leverage pygal's built-in tooltip/hover interactivity more prominently
+  image_description: 'The plot displays a world choropleth map showing GDP per capita
+    data across various countries. Countries are shaded in a sequential blue color
+    scheme ranging from very light blue (GDP < $10k) through progressively darker
+    blues to dark navy (GDP > $50k). The title "choropleth-basic · pygal · pyplots.ai"
+    appears at the top. High-income countries like Canada, Norway, Australia, and
+    Japan are shown in dark blue, while developing economies in Africa and parts of
+    Asia appear in lighter shades. A legend at the bottom displays four bins: GDP
+    < $10k, GDP $10k-$25k, GDP $25k-$50k, and GDP > $50k. Countries without data remain
+    white/unfilled with visible gray borders.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and legend text are readable, though legend text could be slightly
+          larger for optimal clarity at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Country regions are well-defined with visible boundaries, colors
+          clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue palette is colorblind-safe and provides good distinction
+          between values
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, map fills most of the space, minor bottom
+          margin due to Antarctica exclusion
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for choropleth maps (no axes), but legend provides value context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed at bottom in 4 columns, country borders are
+          subtle
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct choropleth map type showing regional coloring
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Countries correctly mapped to GDP per capita values
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Color legend present with value ranges; missing data shown as white
+          (could be gray as spec suggests)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data values represented across the four bins
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly describe the GDP ranges
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows diverse geographic distribution across all continents with
+          varied income levels
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GDP per capita is a real, neutral economic indicator; data ranges
+          are plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values range from ~$1k to ~$85k which matches realistic GDP per capita
+          figures
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, Style, World)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal_maps_world API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png and plot.html (both outputs created)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal_maps_world extension for choropleth, custom Style, legend
+          configuration; could leverage tooltips for interactivity
+  verdict: APPROVED
diff --git a/plots/choropleth-basic/metadata/seaborn.yaml b/plots/choropleth-basic/metadata/seaborn.yaml
index 6a8afe3114..5bd617d4ac 100644
--- a/plots/choropleth-basic/metadata/seaborn.yaml
+++ b/plots/choropleth-basic/metadata/seaborn.yaml
@@ -26,3 +26,179 @@ review:
   - Missing data handling (gray hatched cells) not visible in rendered image
   - Subtitle text could be slightly larger for better readability
   - Could show wider range of values to demonstrate more color scale variation
+  image_description: The plot displays a tile grid choropleth map of US states showing
+    GDP growth rates. Each state is represented as a square tile arranged in an approximate
+    geographic layout. The color scheme uses YlGnBu (yellow-green-blue sequential
+    palette) where lighter yellow/green indicates lower growth rates (~0-1%) and darker
+    blue indicates higher growth rates (~4-5%). Each tile contains the state abbreviation
+    in bold black text and the GDP growth percentage below it. A vertical colorbar
+    on the right shows "GDP Growth Rate (%)" ranging from 0 to 5. The title reads
+    "US States Economic Growth · choropleth-basic · seaborn · pyplots.ai". Alaska
+    and Hawaii are positioned in the bottom left corner. A subtitle at the bottom
+    reads "Stylized grid representation of US states colored by annual GDP growth
+    rate".
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'State codes and values are clearly readable; title and colorbar
+          labels appropriately sized. Minor: subtitle could be slightly larger'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean separation between tiles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles are well-sized, good use of space, all states clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: YlGnBu is colorblind-friendly sequential palette, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, tiles fill canvas well; slight empty space on right
+          side
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Not applicable for tile grid maps (no axes), but colorbar label is
+          present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar well-placed, subtle white grid lines between tiles
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct implementation of choropleth concept using tile grid cartogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: GDP growth rate correctly mapped to color intensity
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has color legend, regional coloring, clear boundaries. Missing:
+          hatched pattern for missing data not visible in rendered image'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data values visible within 0-5% range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows value range
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title includes spec-id, library, and pyplots.ai but adds extra prefix
+          "US States Economic Growth"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across all states with low (0.4% AK) to high (4.5%
+          UT) values. Good regional patterns (tech states higher)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GDP growth rates are a realistic, neutral economic indicator; values
+          are plausible for state economies
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 0.4-4.5% are realistic GDP growth rates; slightly narrow range
+          could show more variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no unnecessary functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Code references features but rendered image differs; potential API
+          compatibility issue
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.heatmap which is seaborn's strength, sns.set_theme for styling.
+          However, choropleth maps are not seaborn's primary use case; implementation
+          is creative but could leverage more statistical features
+  verdict: APPROVED
diff --git a/plots/circlepacking-basic/metadata/bokeh.yaml b/plots/circlepacking-basic/metadata/bokeh.yaml
index a679e6e4ab..4fe54949f9 100644
--- a/plots/circlepacking-basic/metadata/bokeh.yaml
+++ b/plots/circlepacking-basic/metadata/bokeh.yaml
@@ -27,3 +27,171 @@ review:
     is small (14 nodes vs spec suggested 20-200)
   - Slight layout imbalance with empty space at bottom of the canvas
   - HTML output is generated alongside PNG which is acceptable but not strictly required
+  image_description: 'The plot displays a circle packing chart titled "circlepacking-basic
+    · bokeh · pyplots.ai". It shows a hierarchical structure of a technology company
+    (TechCorp) budget allocation. The outermost circle is blue (#306998) representing
+    the root company level. Inside are three yellow (#FFD43B) division circles: Engineering
+    (largest, lower-left), Sales (lower-right), and Operations (upper-center). Within
+    each division are teal/cyan (#4ECDC4) team circles - Engineering contains Backend,
+    Frontend, Mobile, and DevOps; Sales contains Enterprise, SMB, and Partners; Operations
+    contains Finance, HR, and Legal. Circle sizes are proportional to budget values.
+    Labels are displayed in bold black text inside each circle. A legend in the top-right
+    corner explains the color scheme: Root (Company), Division, and Team. The background
+    is light gray (#FAFAFA) and the canvas is square (3600x3600).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, labels at 28pt, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or circles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circle sizes well-adapted to hierarchy, good alpha values
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/teal is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight empty space at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for circle packing (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed, clear, good styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct circle packing chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Parent/child relationships correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Nested circles, size by value, color by depth, labels for larger
+          circles
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All hierarchy levels visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies depth levels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: circlepacking-basic · bokeh · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 3 hierarchy levels, varying circle sizes, but only 14 nodes
+          (spec suggests 20-200)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech company budget allocation is plausible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values in millions are realistic
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: No functions/classes but code is somewhat complex with inline algorithm
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also saves HTML (acceptable for Bokeh)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, HoverTool, LabelSet, Legend - good Bokeh features
+          but could leverage more interactivity
+  verdict: APPROVED
diff --git a/plots/circlepacking-basic/metadata/letsplot.yaml b/plots/circlepacking-basic/metadata/letsplot.yaml
index 192033e010..5f808355f9 100644
--- a/plots/circlepacking-basic/metadata/letsplot.yaml
+++ b/plots/circlepacking-basic/metadata/letsplot.yaml
@@ -25,3 +25,173 @@ review:
     flat KISS structure
   - Some child circle labels (especially in smaller circles) could be slightly larger
     for better readability
+  image_description: |-
+    The plot displays a circle packing chart showing a "Storage Breakdown" visualization. A large dark navy blue root circle contains three main category circles: **Media** (yellow/gold), **Code** (green), and **Documents** (blue). Each parent circle contains nested child circles representing subfolders:
+    - **Media** (largest): Contains Videos (largest, pale yellow), Photos, and Music
+    - **Code**: Contains Projects (largest), Libraries, and Backups
+    - **Documents**: Contains Work, Personal, and Archive
+
+    The circles use white borders/outlines with subtle transparency (alpha ~0.92). Parent category labels ("Media", "Code", "Documents") appear in bold white text, while child labels are in dark gray. The title "Storage Breakdown · circlepacking-basic · letsplot · pyplots.ai" appears at the top. Circle sizes are proportional to storage values. The color scheme uses distinct hues (blue/yellow/green) with lighter shades for children within each category.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and parent labels are clear; child labels are readable but
+          some could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All circles packed without overlap, labels are clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are well-sized and proportional, white outlines help distinguish
+          nested circles
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/green scheme is colorblind-safe (no red-green confusion)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with the root circle filling most of the
+          space, slight asymmetry in lower-right
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for circle packing (no axes needed), but no metric indicator
+          for values
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed as labels are on circles; clean background
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct circle packing chart with nested hierarchy
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Circle area proportional to values, hierarchy correctly represented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows hierarchical nesting, efficient packing, color by category,
+          labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the root circle
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels correct but no legend explaining size metric
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Storage Breakdown · circlepacking-basic ·
+          letsplot · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 3 categories with varied child counts and sizes; could have
+          more variation in depth
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File system storage is a perfect, neutral real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: GB values (8-65) are realistic for folder sizes
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses helper functions instead of flat script (create_circle_points,
+          pack_circles_in_parent)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_polygon for custom shapes, scale_fill_manual,
+          coord_fixed, element_blank for clean theme, ggsave with scale parameter
+  verdict: APPROVED
diff --git a/plots/circlepacking-basic/metadata/matplotlib.yaml b/plots/circlepacking-basic/metadata/matplotlib.yaml
index 127b3ef1c3..440318d024 100644
--- a/plots/circlepacking-basic/metadata/matplotlib.yaml
+++ b/plots/circlepacking-basic/metadata/matplotlib.yaml
@@ -26,3 +26,174 @@ review:
     it hard to identify specific teams
   - Could benefit from showing team names at least for the larger team circles (e.g.,
     Backend at 35 employees)
+  image_description: 'The plot shows a circle packing chart with a company organizational
+    hierarchy. There is one large light blue root circle labeled "Company" in the
+    center. Inside this root circle are four yellow department circles arranged in
+    quadrants: "Engineering" (upper left), "Product" (upper right), "Sales" (lower
+    left), and "Operations" (lower right). Each department circle contains 3-4 smaller
+    light blue team circles representing individual teams. The title "circlepacking-basic
+    · matplotlib · pyplots.ai" appears at the top in bold black text. A legend in
+    the upper right shows "Company (Root)" in blue, "Departments" in yellow, and "Teams"
+    in light blue. The circles are all properly nested without overlapping, and sizing
+    appears proportional to values.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and department labels are clear and readable; team circles
+          lack labels but this is appropriate given their smaller size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All circles are properly nested and contained; no overlapping elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Circles are well-sized and visible; team circles could be slightly
+          larger for better visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Circle fills canvas well with balanced padding; good use of square
+          format
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for circle packing (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed and clearly explains hierarchy levels
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct circle packing chart with nested hierarchy
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to circle areas using sqrt scaling
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Hierarchical nesting, proportional sizing, labels present; hover
+          interaction not applicable for static matplotlib
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three hierarchy levels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 3-level hierarchy with varying branch sizes; could show more
+          variation in team sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company organizational structure is an excellent, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Team sizes (6-35 employees) are realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → layout → plotting → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves to 'plot.png' (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of matplotlib.patches.Circle and proper painter's algorithm
+          for layering; could leverage more advanced features like PathCollection
+          or custom transforms
+  verdict: APPROVED
diff --git a/plots/circlepacking-basic/metadata/plotly.yaml b/plots/circlepacking-basic/metadata/plotly.yaml
index c67830cb55..f9a12308e9 100644
--- a/plots/circlepacking-basic/metadata/plotly.yaml
+++ b/plots/circlepacking-basic/metadata/plotly.yaml
@@ -25,3 +25,173 @@ review:
   - Leaf-level nodes (individual files) are not visible in the rendered output
   - Helper functions violate the KISS principle (should be flat script structure)
   - Some minor text overlap between subcategory and category labels
+  image_description: 'The plot displays a circle packing chart visualizing file storage
+    hierarchy. There are 4 main category circles arranged in a 2x2 grid pattern: **Documents**
+    (blue, 520 MB), **Media** (yellow, 1070 MB), **Projects** (blue, 500 MB), and
+    **System** (yellow, 280 MB). Each category contains nested subcategory circles
+    with labels and values (e.g., Reports 180, Photos 340, Videos 450). The title
+    reads "Storage Analysis · circlepacking-basic · plotly · pyplots.ai" at the top.
+    Colors alternate between blue (#306998/#4B8BBE) and yellow (#FFD43B) for categories,
+    with lighter shades for subcategories. White borders clearly separate hierarchy
+    levels.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and category labels clear; some subcategory labels slightly
+          small but readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap; labels well positioned within circles
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Circles well-sized and clearly visible; subcategories appropriately
+          scaled
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 4 categories arranged in grid; some empty space in corners but overall
+          balanced
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for circle packing (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean white background; no distracting grid needed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct circle packing chart with nested hierarchy
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Size correctly proportional to values (area encoding)
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows 2-level hierarchy; missing deepest leaf level visible in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within circles
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Category labels accurate
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Has "circlepacking-basic · plotly · pyplots.ai" but also includes
+          "Storage Analysis" prefix
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchy well; 2 levels visible but spec mentions 2-4 levels
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File storage visualization is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: MB values plausible; some inconsistency between displayed totals
+          and subcategory sums
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Generally follows imports→data→plot→save; inline packing logic is
+          verbose but avoids functions
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Leverages Plotly shapes for circles, annotations for labels, hover
+          interactivity via scatter traces, and HTML export
+  verdict: APPROVED
diff --git a/plots/circlepacking-basic/metadata/plotnine.yaml b/plots/circlepacking-basic/metadata/plotnine.yaml
index 973230ceaa..8261b66483 100644
--- a/plots/circlepacking-basic/metadata/plotnine.yaml
+++ b/plots/circlepacking-basic/metadata/plotnine.yaml
@@ -26,3 +26,174 @@ review:
   - Manual circle positioning rather than algorithmic packing means circles do not
     pack as tightly as possible
   - Slight empty space at bottom of the root circle could be better utilized
+  image_description: 'The plot displays a circle packing chart representing a company
+    organizational structure. A large light gray root circle (labeled "Root" in legend)
+    encompasses the entire visualization. Inside it are three medium-sized blue circles
+    representing departments: Engineering (top center, largest), Operations (bottom
+    left), and Product (bottom right). Each department contains smaller yellow circles
+    representing teams: Engineering has Backend, Frontend, and DevOps; Operations
+    has Finance, Legal, and HR; Product has Design, PM, and Research. The title "circlepacking-basic
+    · plotnine · pyplots.ai" appears at the top in bold black text. A horizontal legend
+    at the bottom shows the hierarchy levels with colored squares: Root (gray), Departments
+    (blue), and Teams (yellow). Circle sizes correctly reflect the hierarchical values
+    with Engineering being the largest department.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and bold at 28pt, labels are readable at 10-12pt,
+          all text clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels well-positioned within or near circles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circle sizes properly differentiated, good alpha at 0.92
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Gray/blue/yellow palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas, circles well-centered, slight empty space
+          at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for circle packing (no axes), giving full credit
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom, clean theme_void removes grid appropriately
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct circle packing chart with nested circles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Parent-child hierarchy correctly mapped, sizes proportional to values
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows hierarchy levels with color encoding; manual layout instead
+          of force simulation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within root circle
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes hierarchy levels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: circlepacking-basic · plotnine · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 3-level hierarchy, different sized nodes; could show more variation
+          in team sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company organizational structure is realistic and neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Team sizes (8-20 people) and department totals are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_polygon and geom_text, theme_void,
+          scale_fill_manual; creative use of plotnine for non-standard chart type
+  verdict: APPROVED
diff --git a/plots/circos-basic/metadata/altair.yaml b/plots/circos-basic/metadata/altair.yaml
index fe6cc3f8d3..96432fda1b 100644
--- a/plots/circos-basic/metadata/altair.yaml
+++ b/plots/circos-basic/metadata/altair.yaml
@@ -25,3 +25,171 @@ review:
     harder to trace
   - Inner track bars could have more visible separation from each other
   - Legend shows segment colors but not inner track or ribbon meaning
+  image_description: 'The plot displays a Circos-style circular visualization showing
+    software module dependencies. Eight segments are arranged around a circle: Core
+    (blue), API (yellow), Database (green), Auth (crimson), Cache (purple), Queue
+    (teal), Logger (orange), and Config (gray). Each segment''s arc length is proportional
+    to its relative size/importance. An inner concentric track shows darker-shaded
+    bars representing activity/importance values. Semi-transparent ribbons connect
+    segments through the center, with curved Bezier paths showing dependencies between
+    modules (e.g., Core-API, Core-Database, API-Auth). Labels are positioned outside
+    each segment in matching colors. The title "circos-basic · altair · pyplots.ai"
+    appears at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable at output resolution; slightly
+          smaller than optimal
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Segments, tracks, and ribbons are visible; some ribbons could be
+          slightly more distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast between segments
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Circular plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Circos plots (no axes), but segment labels present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-positioned on right side with clear module names
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Circos plot with circular segments and ribbons
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target connections properly mapped with ribbon widths
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has segments, ribbons, inner track; gaps between segments present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments and connections visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all modules
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: circos-basic · altair · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows segments, connections, inner track data; varying ribbon widths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software module dependencies is a realistic, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 8 segments with 14 connections is appropriate; segment sizes plausible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Minor: mark_line with filled=True is non-standard for area shapes'
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses layered composition, tooltips, and declarative encoding; however,
+          Circos requires manual geometry calculation rather than leveraging Altair's
+          strengths
+  verdict: APPROVED
diff --git a/plots/circos-basic/metadata/bokeh.yaml b/plots/circos-basic/metadata/bokeh.yaml
index df54329fd0..d6d027def1 100644
--- a/plots/circos-basic/metadata/bokeh.yaml
+++ b/plots/circos-basic/metadata/bokeh.yaml
@@ -25,3 +25,177 @@ review:
   - Title font at 48pt appears relatively small for the 3600x3600 canvas size
   - Some ribbon colors could use slightly more differentiation when many overlap in
     the center
+  image_description: 'The plot displays a Circos diagram showing trade flows between
+    six global economic regions (Asia, Europe, N. America, S. America, Africa, Oceania).
+    The circular layout features an outer ring with colored segments representing
+    each region - blue for Asia, yellow for Europe, red-coral for N. America, green
+    for S. America, purple for Africa, and orange for Oceania. Region labels are positioned
+    outside the circle, properly rotated for readability. Inside the outer ring is
+    a secondary track showing GDP growth rates as lighter-colored bars of varying
+    heights. The center contains semi-transparent ribbons connecting regions, with
+    ribbon width proportional to trade flow volume. The ribbons use quadratic bezier
+    curves passing through the center. A legend is positioned on the right side with
+    colored boxes and region labels. The title "circos-basic · bokeh · pyplots.ai"
+    appears at the top center. At the bottom-left corner, there is small text noting
+    "Inner track: GDP Growth (%)".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and region labels are readable, legend text is clear; inner
+          track label at bottom-left is somewhat small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, labels well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Outer segments and ribbons are visible, though some ribbons overlap
+          each other due to transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors for each region, colorblind-friendly palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Circular plot is well-centered, good use of canvas; legend placement
+          is good
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: N/A for circular plots, but track label present (partial)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed with clear labels, reference circle subtle
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Circos plot with circular segments and connecting ribbons
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target regions and values correctly mapped to ribbons
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has outer segments, ribbons, and inner track; ribbons show relationships
+          well
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All regions visible with proportional sizing
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all six regions
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but title font could be larger
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows connections, segment sizing, inner track; demonstrates various
+          ribbon widths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Regional trade flows is a compelling real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Trade values in billions USD are realistic; GDP growth percentages
+          are plausible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses export_png which is current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of patches, ColumnDataSource, and bezier curves; exports
+          both PNG and HTML
+  verdict: APPROVED
diff --git a/plots/circos-basic/metadata/highcharts.yaml b/plots/circos-basic/metadata/highcharts.yaml
index 4025d8fb30..0260f4a2c1 100644
--- a/plots/circos-basic/metadata/highcharts.yaml
+++ b/plots/circos-basic/metadata/highcharts.yaml
@@ -24,3 +24,176 @@ review:
   - Minor label overlap at N.America segment where multiple ribbons converge
   - Inner pie chart percentage labels are somewhat small and could be more prominent
   - Missing explicit gaps between outer ring segments as mentioned in specification
+  image_description: The plot displays a circular Circos-style visualization showing
+    global trade flows between 8 regions. The outer ring is a dependency wheel chart
+    with ribbons/chords connecting regions - segments include N.America (dark blue),
+    S.America (yellow), Europe (purple), Africa (cyan), MidEast (brown), S.Asia (pink),
+    E.Asia (green), and Oceania (olive). The inner track shows a donut/pie chart displaying
+    GDP share percentages with white text labels (26%, 28%, 22%, etc.). The title
+    "circos-basic · highcharts · pyplots.ai" appears at the top with a subtitle "Global
+    Trade Flows Between Regions (with GDP Inner Track)". A legend on the right side
+    shows all 8 regions with their corresponding colors. The visualization effectively
+    uses the dependency wheel chart type with an inner pie chart to create a Circos-like
+    layout.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, and region labels are clear and readable. Inner
+          pie chart percentages are visible but slightly small.
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor overlap at "N.America" label where multiple ribbons converge,
+          but overall very clean.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ribbons are well-sized and proportional to flow values, inner track
+          clearly visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses a colorblind-safe palette with distinct colors for all 8 regions.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 3600x3600 square canvas, chart well-centered with
+          appropriate margins for legend.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is present and well-placed, but no grid (appropriate for this
+          chart type, but could have subtle track guides).
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements a Circos-style visualization using dependency
+          wheel + inner pie track.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target/value correctly mapped to ribbon connections.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has segments, ribbons proportional to values, distinct colors, inner
+          track. Missing: gaps between segments as specified.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All regions and connections properly displayed.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to region names.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format "circos-basic · highcharts · pyplots.ai" but uses
+          `·` from Highcharts which renders correctly.
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple flow strengths (N.America-E.Asia strong, others varied),
+          inner GDP track shows economic context. Could show more variation in flow
+          magnitudes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Global trade flows between regions is a perfect, realistic scenario
+          for Circos visualization.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: GDP percentages sum to 100%, trade flow values are plausible.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart config → render.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible flow generation.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, json, tempfile, selenium, etc.).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using direct chart config instead of highcharts_core library classes
+          (works but less idiomatic).
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Excellent use of Highcharts dependency wheel module with multiple
+          series composition (wheel + pie). Could use more Highcharts-specific features
+          like animations or tooltips in static export.
+  verdict: APPROVED
diff --git a/plots/circos-basic/metadata/letsplot.yaml b/plots/circos-basic/metadata/letsplot.yaml
index b52cf8185a..451063cfa3 100644
--- a/plots/circos-basic/metadata/letsplot.yaml
+++ b/plots/circos-basic/metadata/letsplot.yaml
@@ -26,3 +26,173 @@ review:
   - Could reduce whitespace at bottom of plot
   - Interactive features of letsplot not fully utilized (tooltips would enhance the
     visualization)
+  image_description: 'The plot displays a circular Circos visualization with 8 chromosomes
+    (Chr1-Chr8) arranged around the outer ring. The outer ring shows colored segments
+    in distinct colors: Chr1 (blue #306998), Chr2 (yellow #FFD43B), Chr3 (green #27AE60),
+    Chr4 (red #E74C3C), Chr5 (purple #9B59B6), Chr6 (teal #1ABC9C), Chr7 (orange #F39C12),
+    and Chr8 (light blue #3498DB). The segments are proportionally sized based on
+    chromosome size, with Chr1 being the largest and Chr8 the smallest.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear, chromosome labels are bold and easily readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Segments, tracks, and ribbons all clearly visible with appropriate
+          sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 8 distinct colors with good contrast, colorblind-friendly palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with circular plot centered, minor whitespace
+          at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for circos plots (no traditional axes), chromosome labels present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean background, legend at bottom is well-organized
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Circos plot with circular layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target connections properly mapped with bezier ribbons
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: outer ring, gaps between segments, ribbons
+          proportional to value, distinct colors, 2 concentric data tracks'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible with proportional sizing
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Chromosome legend correct
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses descriptive title with spec-id and library, but format is "Genomic
+          Rearrangements · circos-basic · letsplot · pyplots.ai" instead of "{spec-id}
+          · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows chromosomes, connections, and expression tracks; good variety
+          in connection strengths and expression values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Genomic rearrangement scenario is realistic and matches spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Chromosome sizes (50-120 Mb) and expression values (0-1) are realistic;
+          connection values are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly follows imports → data → plot → save, but builds complex intermediate
+          data structures
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_polygon effectively for building circular elements, scale_fill_manual
+          for custom colors, coord_fixed for circular aspect ratio, ggsize for sizing.
+          Could have explored more interactive features.
+  verdict: APPROVED
diff --git a/plots/circos-basic/metadata/matplotlib.yaml b/plots/circos-basic/metadata/matplotlib.yaml
index c5da1fe199..7dcafea1d2 100644
--- a/plots/circos-basic/metadata/matplotlib.yaml
+++ b/plots/circos-basic/metadata/matplotlib.yaml
@@ -25,3 +25,174 @@ review:
   - Legend positioned outside main canvas area requiring bbox_inches=tight - could
     be integrated more elegantly
   - Could use matplotlib built-in Wedge/Arc patches for cleaner segment drawing
+  image_description: 'The plot displays a circular Circos visualization showing software
+    module dependencies. Eight segments are arranged around a circular ring: Core
+    (blue), API (yellow), Database (green), Auth (crimson), Cache (purple), Queue
+    (teal), Logger (orange), and Config (gray). Each segment is proportionally sized
+    based on module importance. The outer ring shows the segments with white separating
+    gaps. An inner concentric track displays bar heights representing importance values
+    for each segment. Curved ribbon connections in the center show dependencies between
+    modules, with ribbon width proportional to connection strength. The title "circos-basic
+    · matplotlib · pyplots.ai" appears at the top. A legend box labeled "Modules"
+    is positioned in the lower right, listing all eight modules with their corresponding
+    colors.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt, segment labels at 18pt bold, legend at 14pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, segments well-spaced with gaps
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Segments and ribbons clearly visible, inner track bars well-sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Eight distinct colors that are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Circular plot well-centered, good use of canvas with legend placed
+          appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Circos plots (no traditional axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-positioned, clean styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Circos plot with circular segments and ribbon connections
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target connections properly mapped, segment sizes reflect
+          importance
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has outer ring, inner data track, and ribbon connections as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments and connections visible and proportional
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly lists all 8 modules with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "circos-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied segment sizes, multiple connection strengths, inner
+          track variations
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software module dependencies is a real, neutral, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 8 segments with 14 connections is appropriate; connection values
+          realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of PathPatch for Bezier curves, custom polygon drawing with
+          fill(), but could leverage matplotlib's Arc or Wedge patches more
+  verdict: APPROVED
diff --git a/plots/circos-basic/metadata/plotly.yaml b/plots/circos-basic/metadata/plotly.yaml
index a27cc2a3d2..f873c36e31 100644
--- a/plots/circos-basic/metadata/plotly.yaml
+++ b/plots/circos-basic/metadata/plotly.yaml
@@ -27,3 +27,177 @@ review:
     · pyplots.ai not Regional Trade Flows · circos-basic · plotly · pyplots.ai'
   - Helper function blend_colors() violates strict KISS principle (imports → data
     → plot → save)
+  image_description: The plot displays a Circos visualization showing regional trade
+    flows between 8 world regions. The outer ring contains colored arc segments for
+    North America (dark blue), Europe (golden yellow), East Asia (red), South America
+    (green), Africa (purple), Middle East (orange), South Asia (teal), and Oceania
+    (light blue). Each segment is proportionally sized based on economic importance.
+    Curved ribbon connections pass through the center, using blended colors from source
+    and target segments (addressing the previous feedback). An inner track displays
+    GDP index values as bar heights using the same color scheme. Labels are positioned
+    around the outer edge with adaptive text anchoring. A horizontal legend at the
+    bottom identifies all regions. The title reads "Regional Trade Flows · circos-basic
+    · plotly · pyplots.ai" - note this still includes an extra descriptor before the
+    spec-id.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels readable, font size adequate at 16pt for segment
+          labels
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor cramping on "Middle East" and "Africa" labels due to proximity,
+          but still readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Segments, ribbons, and inner track all clearly visible with good
+          sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 8 distinct colors, well differentiated, no red-green confusion
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills substantial area, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for circular plot, no axis labels needed (deducting 0 as circular
+          plots don't have axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well positioned horizontally at bottom, clean without grid
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Circos plot with segments, ribbons, and inner track
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target connections correctly mapped as ribbons, values determine
+          width
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has segments, ribbons, inner track, gaps, proportional sizing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 8 segments visible with all connections shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All 8 regions correctly listed with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title includes extra "Regional Trade Flows" prefix, should be just
+          "circos-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows segment sizing, ribbons, inner track; ribbon widths vary appropriately
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Trade flows between world regions is excellent real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Trade values reasonable; segment sizes proportional to economic importance
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Has helper function blend_colors() which deviates from pure KISS
+          style
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set properly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Scatter with fill='toself' for shapes, interactive HTML
+          output, but could leverage more Plotly-specific features
+  verdict: APPROVED
diff --git a/plots/circos-basic/metadata/plotnine.yaml b/plots/circos-basic/metadata/plotnine.yaml
index 6800946ac7..b189ec91cf 100644
--- a/plots/circos-basic/metadata/plotnine.yaml
+++ b/plots/circos-basic/metadata/plotnine.yaml
@@ -27,3 +27,184 @@ review:
     different attribute data
   - Some chord ribbons are quite thin and could be more visible
   - Slight layout imbalance with more empty space at bottom of canvas
+  image_description: 'The plot shows a circos/chord diagram visualizing trade flows
+    between 6 world regions: Europe (yellow), North America (green), South America
+    (orange), Middle East (red), Africa (purple), and Asia (blue). The circular layout
+    has an outer ring showing each region as a colored arc segment, with segment sizes
+    proportional to total trade volume. An inner concentric track (lighter shading)
+    provides an additional data layer. Ribbons/chords connect regions through the
+    center, with ribbon width proportional to trade flow values. The title "circos-basic
+    · plotnine · pyplots.ai" appears at the top. Region labels are positioned outside
+    the ring in bold dark text. A legend on the right shows the color mapping for
+    each region.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and clear (24pt), region labels are bold and readable,
+          legend text is appropriately sized. Slightly deducted as some labels could
+          be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; labels are well-positioned outside
+          the ring
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Ribbons are visible with good alpha (0.5), arcs are clear. Some thinner
+          ribbons could be slightly more prominent.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Six distinct colors (blue, yellow, green, orange, red, purple) with
+          good contrast, avoiding problematic red-green only distinctions
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot is centered with legend on right. Slight
+          imbalance with more whitespace on bottom.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for circular plot (no axes), but region labels serve this purpose
+          well
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-placed; subtle grid circles present but minimal
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct circos/chord diagram with circular layout, segments, and
+          connecting ribbons
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target correctly mapped to chord connections, values determine
+          ribbon width
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has outer ring, ribbons/chords, distinct segment colors, and inner
+          data track. Gap separation present. Minor: segment_size shown via flow totals
+          rather than explicit sizing.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 6 regions displayed, all 18 connections visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 6 regions with accurate color mapping
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "circos-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bidirectional flows, varying connection strengths, multiple
+          regions. Could show more variation in inner track data.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Trade flows between world regions is an excellent, realistic, neutral
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Trade values (18-85) represent plausible relative magnitudes, though
+          units not specified
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded flow values, no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with geom_polygon, geom_path, geom_text,
+          scale_fill_manual, and comprehensive theming. However, this is a creative
+          workaround since plotnine doesn't have native circos support - the implementation
+          builds the circular geometry manually using standard geoms.
+  verdict: APPROVED
diff --git a/plots/circos-basic/metadata/seaborn.yaml b/plots/circos-basic/metadata/seaborn.yaml
index e17d274220..1ee6bb8b48 100644
--- a/plots/circos-basic/metadata/seaborn.yaml
+++ b/plots/circos-basic/metadata/seaborn.yaml
@@ -26,3 +26,180 @@ review:
     - this is a limitation of the library for Circos plots
   - Some ribbons in the center appear quite dense and could benefit from slightly
     more transparency
+  image_description: 'The plot displays a circular Circos visualization showing global
+    trade flows between 10 world regions. The outer ring consists of colored arc segments
+    representing regions: North America (pink), East Asia (olive/yellow-green), Europe
+    (orange), South Asia (green), Middle East (teal), Southeast Asia (light green),
+    Africa (cyan/teal), Oceania (blue), South America (light purple), and Central
+    Asia (magenta). Each segment''s arc length is proportional to the region''s total
+    trade volume. An inner concentric track shows bar heights representing trade volume
+    for each region. Ribbons connect regions through the center, with widths proportional
+    to trade flow values. The title "Global Trade Flows · circos-basic · seaborn ·
+    pyplots.ai" appears at the top. Labels for each region are positioned outside
+    the ring with appropriate rotation for readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 33
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: All labels are readable, though some smaller regions have slightly
+          cramped text. Title is clear and appropriately sized.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements; labels are well-positioned with
+          proper rotation
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Outer ring segments, inner track bars, and ribbons are all visible;
+          ribbon widths show good variation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses tab10 palette which provides good distinction; adjacent segments
+          have distinct colors
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square aspect ratio for circular plot; adequate margins
+          for labels
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean white background, legend at bottom explaining visualization
+          components
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Circos plot implementation with circular layout, segments,
+          and connection ribbons
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target connections properly mapped, segment sizes proportional
+          to trade volume
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has outer ring segments, inner data track, and connecting ribbons
+          as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments and connections visible and within bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes outer ring, inner track, and ribbons
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Title format incorrect: shows "Global Trade Flows · circos-basic
+          · seaborn · pyplots.ai" but should be "circos-basic · seaborn · pyplots.ai"
+          as primary title'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 10 regions with varying segment sizes, 15 connections with
+          different strengths, inner track data
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Global trade flows is an excellent real-world application for Circos
+          plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Trade values in billions USD are reasonable; segment reordering ensures
+          visual distinction
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script with imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (seaborn, matplotlib, numpy)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses context="poster" which is valid but font_scale=1.3 may cause
+          compatibility notes
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Seaborn is only used for sns.set_theme() and sns.color_palette();
+          no seaborn plotting functions are used. The actual plotting is done entirely
+          with matplotlib. This is a fundamental limitation as Circos plots are not
+          native to seaborn.
+  verdict: APPROVED
diff --git a/plots/confusion-matrix/metadata/altair.yaml b/plots/confusion-matrix/metadata/altair.yaml
index 051f3bc69c..e38bab3c45 100644
--- a/plots/confusion-matrix/metadata/altair.yaml
+++ b/plots/confusion-matrix/metadata/altair.yaml
@@ -23,3 +23,175 @@ review:
   weaknesses:
   - Missing interactive tooltips which are a distinctive Altair feature
   - Legend positioning could be optimized to be closer to the plot
+  image_description: The plot displays a 4x4 confusion matrix heatmap for a multi-class
+    animal classification model (Dog, Cat, Bird, Fish). The matrix uses a sequential
+    blue color scheme ("blues") where darker blue indicates higher counts. The diagonal
+    cells (correct predictions) show high values (85, 78, 82, 91) with dark blue coloring,
+    while off-diagonal cells (misclassifications) show lower values with lighter blue
+    shades. Each cell is annotated with its count value - white text on dark cells
+    and dark blue (#306998) text on light cells for optimal contrast. The title "confusion-matrix
+    · altair · pyplots.ai" appears at the top. The y-axis is labeled "True Label"
+    and the x-axis "Predicted Label". A vertical colorbar on the right shows the count
+    scale from approximately 0-90. White cell borders provide clear separation between
+    matrix cells.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 20pt, cell annotations
+          at 28pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Matrix cells are well-sized with clear white borders, annotations
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good square layout, plot fills canvas well, minor asymmetry with
+          legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Clear descriptive labels: "True Label" and "Predicted Label"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid (appropriate for heatmap), but legend could be better positioned/sized
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct confusion matrix heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: True labels on Y-axis, Predicted labels on X-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Cell annotations, colorbar, clear axis labels all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All classes and values visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows count scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "confusion-matrix · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows diagonal dominance (correct predictions), off-diagonal errors,
+          varying accuracy per class (Fish highest at 91%, Cat lowest at 78%)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Animal classification is a plausible ML scenario with realistic confusion
+          patterns (Dog-Cat more confused than Fish)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Count values are realistic for a test set (~100 samples per class)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic Altair features (mark_rect, mark_text, layering) but doesn't
+          leverage interactive features like tooltips or selection that are Altair's
+          distinctive strengths
+  verdict: APPROVED
diff --git a/plots/confusion-matrix/metadata/bokeh.yaml b/plots/confusion-matrix/metadata/bokeh.yaml
index ea5b5bf4a1..862a944e38 100644
--- a/plots/confusion-matrix/metadata/bokeh.yaml
+++ b/plots/confusion-matrix/metadata/bokeh.yaml
@@ -25,3 +25,162 @@ review:
   - 'Minor: X-axis labels have slight rotation (0.4 rad) which is not strictly necessary
     for 4 short class names'
   - 'VQ-07: Could add slightly more styling to the colorbar or include summary statistics'
+  image_description: The plot displays a 4x4 confusion matrix heatmap for a sentiment
+    analysis classifier with classes "Negative", "Neutral", "Positive", and "Very
+    Positive". The matrix uses a Blues sequential colormap ranging from light blue
+    (low values near 0) to dark navy blue (high values around 156). Each cell contains
+    bold white or dark blue text showing the count value, with contrasting colors
+    for readability. The diagonal shows strong correct predictions (142, 98, 156,
+    86), while off-diagonal cells show misclassification patterns. The title "confusion-matrix
+    · bokeh · pyplots.ai" appears at the top. Axis labels clearly show "True Label"
+    (Y-axis) and "Predicted Label" (X-axis), with class names on both axes. A colorbar
+    on the right indicates the count scale from 0-156.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text clearly readable at full size, appropriate font sizes for
+          3600x3600 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, well-spaced labels and annotations
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cell rectangles perfectly sized with white borders for clear separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues sequential colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good square layout appropriate for confusion matrix
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive "True Label" and "Predicted Label"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid (appropriate), colorbar present but minor styling gap
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct confusion matrix heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: True labels on Y-axis, Predicted on X-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features: annotations, colorbar, sequential colormap'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Class names correct
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "confusion-matrix · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows diagonal dominance, adjacent class confusion, class imbalance
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sentiment analysis is a realistic multi-class scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic count values (~750 total samples)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/confusion-matrix/metadata/highcharts.yaml b/plots/confusion-matrix/metadata/highcharts.yaml
index 16a9bc2ac0..3966590539 100644
--- a/plots/confusion-matrix/metadata/highcharts.yaml
+++ b/plots/confusion-matrix/metadata/highcharts.yaml
@@ -25,3 +25,175 @@ review:
   - Layout could better utilize the square canvas - some wasted space around edges
   - np.random.seed(42) is set but data is entirely hardcoded, making the seed unnecessary
   - Colorbar scale goes to 100 but max value is 87, slightly misleading range
+  image_description: 'The plot displays a 4x4 confusion matrix heatmap for an image
+    classification model with classes: Cat, Dog, Bird, and Fish. The matrix uses a
+    sequential blue colormap ranging from light blue (low values ~0) to dark blue
+    (high values ~85-87). The diagonal shows strong correct predictions (87, 76, 82,
+    85) in dark blue, while off-diagonal misclassifications appear in lighter shades.
+    Each cell displays its count value in bold black text with white outline. The
+    title "confusion-matrix · highcharts · pyplots.ai" appears at the top with a subtitle
+    "Image Classification Model Performance". The y-axis is labeled "True Label" and
+    x-axis "Predicted Label". A vertical colorbar on the right shows the "Count" scale
+    from 0-100.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title, subtitle, axis labels, class
+          names, and cell values are all appropriately sized'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells are well-sized, values clearly visible with good contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue colormap is colorblind-safe, good contrast between
+          light/dark values
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but slight imbalance with colorbar placement;
+          plot is well-proportioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("True Label", "Predicted Label") but no units
+          (acceptable for categorical data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar well placed with "Count" title; white cell borders provide
+          good separation
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap/confusion matrix visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: True labels on y-axis, Predicted labels on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Cell annotations, colorbar, proper axis labels all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, colorbar scale 0-100 appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled as "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "confusion-matrix · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows diagonal dominance (correct predictions), varying accuracy
+          per class, realistic misclassification patterns (Cat↔Dog, Bird↔Fish confusion)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Image classification with animal classes is a plausible ML scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable (76-87 correct predictions per class out of
+          100), though all classes have similar accuracy
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though data is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses current API but includes unused seed
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Highcharts heatmap series, custom tooltip formatter,
+          color axis with stops, data labels with formatter
+  verdict: APPROVED
diff --git a/plots/confusion-matrix/metadata/letsplot.yaml b/plots/confusion-matrix/metadata/letsplot.yaml
index b859bbae60..4b3ea76a82 100644
--- a/plots/confusion-matrix/metadata/letsplot.yaml
+++ b/plots/confusion-matrix/metadata/letsplot.yaml
@@ -22,3 +22,183 @@ review:
   weaknesses:
   - Colorbar could be positioned closer to the plot for better visual cohesion
   - Grid/Legend score impacted by colorbar distance from main plot area
+  image_description: 'The plot displays a 4x4 confusion matrix heatmap for an image
+    classification task with classes Cat, Dog, Bird, and Fish. The matrix uses a sequential
+    blue colormap (light to dark blue gradient) where darker cells indicate higher
+    counts. Each cell shows both the raw count and row-normalized percentage in bold
+    black or white text (white text on darker cells, black on lighter). The diagonal
+    cells (correct predictions) are visibly darker, showing good model performance:
+    Cat=45 (77%), Dog=52 (82%), Bird=38 (79%), Fish=41 (80%). Off-diagonal cells show
+    realistic misclassification patterns (e.g., Cat↔Dog confusion, Bird↔Fish confusion).
+    The title "confusion-matrix · letsplot · pyplots.ai" appears at the top. Y-axis
+    labeled "True Label", X-axis labeled "Predicted Label". A colorbar on the right
+    shows the count scale from ~10 to 50. The plot uses a square 1:1 aspect ratio
+    with coord_fixed().'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (~28pt), axis labels are clear (~22pt), cell
+          annotations are bold and readable (~14pt scaled)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels and annotations are fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles are perfectly sized for 4x4 matrix, white borders between tiles
+          provide clear separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue colormap is colorblind-safe, good contrast between
+          light/dark cells
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with square aspect ratio, slight asymmetry due
+          to colorbar placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "True Label" and "Predicted Label" but no units
+          (though units not applicable for categorical data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Panel grid is removed (good), but colorbar is somewhat distant from
+          the plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap/tile-based confusion matrix visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: True labels on Y-axis, Predicted labels on X-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has annotations (count + percentage), colorbar, sequential colormap,
+          clear axis labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 4 classes visible, full matrix shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Count" with appropriate scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "confusion-matrix · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows diagonal dominance (correct predictions), off-diagonal misclassifications,
+          and realistic confusion patterns; could show more varied accuracy rates
+          between classes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Image classifier for animals is a plausible real-world scenario with
+          Cat/Dog/Bird/Fish classes
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (~50-60 samples per class, 77-82% accuracy),
+          though slightly idealized
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but also saves "plot.html" which is fine, however
+          the code defines df.iterrows() which is slow but acceptable
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, geom_tile, scale_color_identity for text colors,
+          tooltips for interactivity (HTML version), coord_fixed(); could leverage
+          more lets-plot specific features like custom tooltips formatting
+  verdict: APPROVED
diff --git a/plots/confusion-matrix/metadata/matplotlib.yaml b/plots/confusion-matrix/metadata/matplotlib.yaml
index 3de482f8f4..6e42db2e7f 100644
--- a/plots/confusion-matrix/metadata/matplotlib.yaml
+++ b/plots/confusion-matrix/metadata/matplotlib.yaml
@@ -22,3 +22,179 @@ review:
   weaknesses:
   - Does not leverage matplotlib-specific advanced features (just uses basic imshow)
   - Grid lines at width=2 are slightly prominent compared to subtle alpha=0.3 guidelines
+  image_description: 'The plot displays a 4x4 confusion matrix heatmap for a multi-class
+    classification scenario with classes: Cat, Dog, Bird, and Fish. The diagonal cells
+    (correct predictions) are dark blue with high values (85, 78, 88, 90), while off-diagonal
+    cells (misclassifications) are lighter shades of blue with lower values. Each
+    cell shows both the raw count and the row percentage in parentheses. The y-axis
+    is labeled "True Label" and x-axis is labeled "Predicted Label". A colorbar on
+    the right shows the "Count" scale from approximately 10 to 90. The title follows
+    the required format "confusion-matrix · matplotlib · pyplots.ai". Text uses white
+    color on dark backgrounds and black on light backgrounds for contrast. X-axis
+    labels are rotated 45 degrees. White grid lines separate the cells.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, tick labels 18pt, cell annotations 16pt
+          bold - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are well-sized, annotations clear with proper contrast (white
+          on dark, black on light)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues colormap is colorblind-safe, sequential palette appropriate
+          for count data
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format (12x12 inches) appropriate for symmetric matrix, good
+          margins, colorbar well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("True Label", "Predicted Label") but lack
+          units (not applicable for categorical data, but could indicate "Class")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: White grid lines between cells are functional; colorbar is well-placed
+          though grid is slightly prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct confusion matrix heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: True labels on y-axis, Predicted labels on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Cell annotations with counts and percentages, Blues colormap, colorbar,
+          clear axis labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All cells visible and readable
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "confusion-matrix · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both correct predictions (diagonal) and misclassifications
+          (off-diagonal), varying accuracy levels across classes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Animal classification is a common real-world ML scenario, class confusion
+          patterns are realistic (Cat/Dog more confused than Fish)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for a test set (~100 samples per class, 78-90%
+          accuracy range)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic imshow() for heatmap; could use matplotlib's more advanced
+          features like ConfusionMatrixDisplay from sklearn integration, or custom
+          annotations with bounding boxes
+  verdict: APPROVED
diff --git a/plots/confusion-matrix/metadata/plotly.yaml b/plots/confusion-matrix/metadata/plotly.yaml
index 103772ce0a..5e2cbe8e0d 100644
--- a/plots/confusion-matrix/metadata/plotly.yaml
+++ b/plots/confusion-matrix/metadata/plotly.yaml
@@ -23,3 +23,174 @@ review:
   - Image dimensions use square format (3600x3600) instead of standard 4800x2700 landscape,
     though appropriate for confusion matrices
   - Axis labels could be more descriptive (e.g., True Sentiment Class vs True Label)
+  image_description: 'The plot displays a 4×4 confusion matrix heatmap for a sentiment
+    analysis classifier with four classes: Negative, Neutral, Positive, and Very Positive.
+    The matrix uses a sequential Blues colormap ranging from light (low values ~1-15)
+    to dark blue (high values ~70-85). The y-axis shows "True Label" and x-axis shows
+    "Predicted Label". Each cell contains white or black numeric annotations showing
+    the count values (white text on dark backgrounds, black text on light backgrounds).
+    The title "confusion-matrix · plotly · pyplots.ai" is centered at the top. A colorbar
+    on the right shows the "Count" scale from approximately 1-85. The diagonal cells
+    (correct predictions) are darker blue, clearly showing the model performs well
+    with most predictions on the diagonal. The matrix is square with equal cell dimensions,
+    and all text is clearly legible.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt, cell annotations
+          at 24pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are well-sized, colormap distinguishes values effectively
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues colormap is colorblind-safe sequential palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square layout with good margins, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("True Label", "Predicted Label") but no units
+          needed for categorical data
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar well placed with appropriate sizing, no grid needed for
+          heatmap
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct confusion matrix heatmap visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: True labels on Y-axis, Predicted labels on X-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Annotated cells with counts, colorbar, sequential colormap, clear
+          axis labeling
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows count scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "confusion-matrix · plotly · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows true positives on diagonal, off-diagonal misclassifications,
+          adjacent class confusion pattern
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sentiment analysis is a real, comprehensible scenario with 4 classes
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Counts in realistic range (1-85), class sizes balanced
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Custom hovertemplate for interactivity, graph_objects for fine control,
+          plotly_white template, scale anchoring for square cells
+  verdict: APPROVED
diff --git a/plots/confusion-matrix/metadata/plotnine.yaml b/plots/confusion-matrix/metadata/plotnine.yaml
index 755f333d49..1c4f6a4965 100644
--- a/plots/confusion-matrix/metadata/plotnine.yaml
+++ b/plots/confusion-matrix/metadata/plotnine.yaml
@@ -24,3 +24,176 @@ review:
   - Could benefit from displaying percentages alongside or instead of raw counts for
     easier interpretation
   - Legend colorbar title could be more descriptive (e.g., Sample Count)
+  image_description: 'The plot displays a 3×3 confusion matrix heatmap for a sentiment
+    analysis classifier with three classes: Negative, Neutral, and Positive. The matrix
+    uses a sequential blue colormap (light blue to dark blue) from the Blues palette.
+    The diagonal cells (correct predictions) show high values (85, 72, 81) in dark
+    blue, while off-diagonal cells show lower misclassification counts in lighter
+    shades. Cell values are annotated with bold numbers - white text on dark cells
+    and dark blue text on light cells for contrast. The y-axis shows "True Label"
+    and x-axis shows "Predicted Label". A colorbar on the right indicates the count
+    scale (0-80+). The title follows the required format. The layout is square (1:1
+    aspect ratio) with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 26pt, axis titles at 22pt bold, axis text at 18pt, cell
+          annotations at size 20 bold - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles well-sized with good white borders, annotations highly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential Blues colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 1:1 ratio appropriate for matrix, good canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "True Label" and "Predicted Label" but no units
+          (expected for categorical)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is removed (element_blank), which is appropriate, but legend
+          title "Count" could be more descriptive; colorbar well-placed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct confusion matrix heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: True labels on y-axis, Predicted on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has cell annotations, colorbar, clear axes; missing normalization
+          option display but raw counts shown well
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents count scale
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses "confusion-matrix · plotnine · pyplots.ai" format (correct but
+          missing capitalization for consistency)
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows diagonal dominance (correct predictions), off-diagonal confusion,
+          varying accuracy by class (Neutral harder to classify)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sentiment analysis is a real, relatable ML scenario; class confusion
+          patterns are realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Count values (3-85) are realistic for a test dataset
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" ✓ but verbose=False may hide useful output
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar (aes, geom_tile, geom_text, scale_fill_gradient,
+          coord_fixed, theme_minimal). Could use scale_fill_brewer for more distinctive
+          plotnine styling, but current approach is clean.
+  verdict: APPROVED
diff --git a/plots/confusion-matrix/metadata/pygal.yaml b/plots/confusion-matrix/metadata/pygal.yaml
index cdbcfbfea1..c461b3b025 100644
--- a/plots/confusion-matrix/metadata/pygal.yaml
+++ b/plots/confusion-matrix/metadata/pygal.yaml
@@ -24,3 +24,171 @@ review:
     (though necessary for pygal)
   - Imports from internal module (pygal.graph.graph) rather than public API
   - The sys.path manipulation for import is a workaround that adds complexity
+  image_description: 'The plot displays a 4×4 confusion matrix heatmap for a sentiment
+    analysis classifier with classes: Positive, Neutral, Negative, and Mixed. The
+    matrix uses a sequential blue colormap ranging from very light blue (#f7fbff)
+    for low values to dark blue (#08306b) for high values. The title "confusion-matrix
+    · pygal · pyplots.ai" appears at the top. "True Label" is shown on the y-axis
+    (left side, rotated), "Predicted Label" on the x-axis (bottom). Class labels are
+    displayed on both axes - row labels on the left and column labels rotated 45°
+    at the bottom. Each cell contains the count value with white text on dark cells
+    and dark text on light cells. A colorbar on the right shows the scale from 5 to
+    142 with a "Count" title. The diagonal cells (correct predictions) have a subtle
+    blue border highlighting them. The layout is clean and well-balanced on a square
+    canvas.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text clearly readable at full size, appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements, rotated labels prevent overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: cells well-sized, values clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: sequential blue colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: good proportions, but some unused space in corners
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"True Label" and "Predicted Label" are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: no grid (acceptable for heatmap), but no legend since it's a matrix
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct confusion matrix heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: true labels on y-axis, predicted on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: annotations present, colorbar included, diagonal highlighted
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible, colorbar shows full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: colorbar correctly labeled "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows varying performance across classes, different confusion patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: sentiment analysis is a real-world classification scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: counts are realistic for a test dataset
+    code_quality:
+      score: 5
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: uses custom class (necessary for pygal limitation, but deviates from
+          KISS)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: uses internal pygal.graph.graph.Graph (not public API)
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: saves to plot.svg and plot.png (spec requires plot.png, which is
+          correct)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: custom SVG generation, pygal styling system, smooth color interpolation
+  verdict: APPROVED
diff --git a/plots/confusion-matrix/metadata/seaborn.yaml b/plots/confusion-matrix/metadata/seaborn.yaml
index d07f2ebbdd..5d337913ff 100644
--- a/plots/confusion-matrix/metadata/seaborn.yaml
+++ b/plots/confusion-matrix/metadata/seaborn.yaml
@@ -23,3 +23,167 @@ review:
   weaknesses:
   - Minor colorbar label duplication (label set both in cbar_kws and manually)
   - Data is deterministic but no explicit np.random.seed() for documentation purposes
+  image_description: 'The plot displays a 4x4 confusion matrix heatmap for sentiment
+    analysis classification with four classes: Negative, Neutral, Positive, and Mixed.
+    The matrix uses a sequential blue colormap ("Blues") where darker blues indicate
+    higher counts. Cell annotations show the raw count values in bold white/black
+    text. The diagonal cells (correct predictions) are prominently darker: Negative=156,
+    Neutral=142, Positive=168, Mixed=125. Off-diagonal cells show misclassification
+    patterns with lighter shading. The y-axis is labeled "True Label", x-axis is "Predicted
+    Label". A colorbar on the right shows the count scale from ~0 to 168. The title
+    reads "Sentiment Analysis Model · confusion-matrix · seaborn · pyplots.ai". White
+    gridlines separate cells clearly. X-axis labels are rotated 45° for readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 22pt, tick labels at 18pt, annotations
+          at 20pt bold - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, rotated x-labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are square, well-sized, annotations clearly visible with contrasting
+          colors
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues sequential colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square format appropriate for matrix, good proportions but colorbar
+          could be slightly better integrated
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"True Label" and "Predicted Label" as specified'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid needed for heatmap, but colorbar label "Count" is duplicated
+          (in cbar_kws and manually set)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct confusion matrix heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: True labels on y-axis, Predicted on x-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Annotations present, Blues colormap, colorbar included, clear axis
+          labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 4 classes visible, full range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly shows count scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{description} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows strong diagonal (good model), asymmetric confusion patterns,
+          varied misclassification rates
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sentiment analysis is a real-world NLP task, 4 classes are realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Counts are realistic for a test set (~700 samples total)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed, though data is deterministic (hardcoded matrix)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/contour-basic/metadata/altair.yaml b/plots/contour-basic/metadata/altair.yaml
index 0a36e0a293..ee78362714 100644
--- a/plots/contour-basic/metadata/altair.yaml
+++ b/plots/contour-basic/metadata/altair.yaml
@@ -22,3 +22,175 @@ review:
   - Both PNG and HTML outputs generated correctly
   weaknesses:
   - Axis labels could include units or more context
+  image_description: The plot displays a contour visualization of two overlapping
+    Gaussian peaks on a 2D scalar field. The filled background uses the viridis colormap
+    (purple for low values ~0.1 transitioning through green/cyan to yellow for peak
+    values ~1.0). White contour lines are clearly overlaid showing isolines at multiple
+    levels. The primary peak is centered around (1, 1) with bright yellow at its center,
+    while a secondary smaller peak is visible around (-1, -0.5). The title "contour-basic
+    · altair · pyplots.ai" appears at the top center. Axis labels show "X Value" (horizontal)
+    and "Y Value" (vertical) with tick marks from -3.0 to 3.0. A colorbar on the right
+    displays "Z Value" scale from ~0.1 to 1.0. The overall layout is balanced with
+    no overlapping elements.
+  criteria_checklist:
+    visual_quality:
+      score: 39
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Contour lines clearly visible in white against viridis, filled regions
+          show smooth gradations
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe, white contour lines provide
+          excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content, proper spacing
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("X Value", "Y Value") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar well-placed on right, no grid needed for contour plots
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly shows isolines (contour lines) of a 2D scalar field
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y coordinates and Z values correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has both filled contour regions AND contour lines, colorbar present
+          per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Z Value"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Two overlapping Gaussian peaks demonstrate gradients, valleys, peaks,
+          and how contours connect points of equal value
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Mathematical function z = f(x, y) is explicitly mentioned in spec
+          as example data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for mathematical demonstration (-3 to 3 range,
+          Z values 0-1)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic results
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (altair, numpy, pandas) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Creative implementation using marching squares algorithm for contour
+          lines and rect marks for filled background. Altair doesn't have native contour
+          support, so this demonstrates clever use of available primitives.
+  verdict: APPROVED
diff --git a/plots/contour-basic/metadata/bokeh.yaml b/plots/contour-basic/metadata/bokeh.yaml
index f2d1a95d47..ba15e7fe20 100644
--- a/plots/contour-basic/metadata/bokeh.yaml
+++ b/plots/contour-basic/metadata/bokeh.yaml
@@ -27,3 +27,176 @@ review:
     regions
   - Axis labels use generic X Coordinate / Y Coordinate instead of a domain-specific
     context
+  image_description: The plot displays a contour visualization of a 2D scalar field
+    with a Gaussian peak function. The main peak is centered around (0, 0) with a
+    secondary, smaller peak visible around (1.5, -1). The filled contours use the
+    Viridis colormap (colorblind-safe) transitioning from dark purple/blue (low values
+    ~0.05) through teal/cyan to green and finally bright yellow (high values ~0.95).
+    Contour lines in dark gray overlay the filled regions. The title "contour-basic
+    · bokeh · pyplots.ai" appears at the top left. X and Y axes are labeled "X Coordinate"
+    and "Y Coordinate" respectively, with tick marks from -3 to 3. A colorbar on the
+    right shows the Z Value scale from ~0.05 to ~0.95. The overall layout is clean
+    with a light gray background and subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          high resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Contour levels are well-defined with smooth transitions and clear
+          boundaries
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis palette is colorblind-safe with excellent contrast progression
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, but colorbar "Z Value" title appears partially
+          cut off at edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Coordinate", "Y Coordinate") but no units
+          provided
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle but barely visible against the dark contour fill
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct contour plot with both filled regions and contour lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y coordinates correctly mapped to grid, Z values to color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has colorbar, contour lines, filled regions as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range (-3.2 to 3.2 buffer works well)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents Z value range
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "contour-basic · bokeh · pyplots.ai" format
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows primary and secondary peaks demonstrating nested contours;
+          could show more varied topography (saddle points, ridges)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Gaussian peak function is a plausible mathematical scenario; axis
+          labels are generic rather than domain-specific
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 0-1 range is appropriate for the normalized Gaussian function
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → contour generation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is deterministic anyway
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh and contourpy APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" (correct) but code structure includes unnecessary
+          top-level function-like organization
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's patches and multi_line glyphs appropriately; uses contourpy
+          for contour generation which is standard. Could leverage more Bokeh-specific
+          features like HoverTool for interactivity in the HTML version.
+  verdict: APPROVED
diff --git a/plots/contour-basic/metadata/highcharts.yaml b/plots/contour-basic/metadata/highcharts.yaml
index 43ed6e6f06..62590347d0 100644
--- a/plots/contour-basic/metadata/highcharts.yaml
+++ b/plots/contour-basic/metadata/highcharts.yaml
@@ -26,3 +26,190 @@ review:
   - Contour labels cluster in the lower-left peak area, minor readability concern
   - Code uses helper functions which deviates from KISS structure (though justified
     for algorithm complexity)
+  image_description: 'The plot displays a contour visualization using a heatmap background
+    with overlaid white contour lines and black shadow outlines for visibility. The
+    colormap is viridis (purple to yellow gradient), showing three distinct regions:
+    a bright yellow high-value peak in the upper-right (~90%), a yellow-green peak
+    in the lower-left (~80%), and a dark purple low-value region in the upper-center
+    (~10%). White contour lines clearly delineate iso-levels from 10% to 90% in 10%
+    increments. Each contour level is labeled with a white rectangular annotation
+    showing the percentage value. The title reads "contour-basic · highcharts · pyplots.ai"
+    at the top. X-axis is labeled "X Position (units)" and Y-axis is labeled "Y Position
+    (units)", both ranging from approximately -3.0 to 3.0. A vertical colorbar on
+    the right shows "Intensity (%)" from 0% to 100%.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and contour labels are all clearly readable.
+          Font sizes are appropriate for the 4800x2700 resolution.
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Contour labels are well-placed along the lines. Minor clustering
+          of labels in the lower-left peak (30%-80% labels stack vertically), but
+          still readable.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells are properly sized for the 100x100 grid. Contour lines
+          with shadow effect provide excellent visibility against the varying background
+          colors.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe. White contour lines with black
+          shadow ensure visibility across all color values.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good margins, no clipping. Colorbar is well-positioned on the right.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "X Position (units)", "Y Position
+          (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines are barely visible/not shown on the heatmap. The dotted
+          grid configured in code is not apparent in the final image.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct contour plot type with both filled regions (heatmap) and
+          contour lines as specified.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly mapped. Grid coordinates properly assigned.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has colorbar, contour lines, filled regions, and labeled levels.
+          The specification suggests "Consider using both contour lines and filled
+          regions for clarity" - this is done well. However, no diverging colormap
+          option shown (only viridis which is sequential).
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes span the full -3 to 3 range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar legend correctly shows "Intensity (%)" with proper scale.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple peaks (high values), a valley (low value region),
+          and gradients between them. Demonstrates the key features of contour plots
+          well. Could show a saddle point for completeness.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Uses a mathematical Gaussian combination that simulates "temperature
+          distribution" as mentioned in comments. Plausible scientific scenario but
+          generic.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid of 100x100 points provides smooth contours. Value range 0-100%
+          is sensible for normalized intensity.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Code uses functions (marching_squares_contour, connect_segments)
+          rather than flat script structure. While these are necessary for the contour
+          extraction algorithm, it deviates from the KISS principle.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set, ensuring reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Highcharts heatmap module with annotations API for
+          contour labels. Custom marching squares algorithm to generate contour lines
+          as overlay series. Shadow effect on contour lines for visibility. Interactive
+          HTML output alongside PNG.
+  verdict: APPROVED
diff --git a/plots/contour-basic/metadata/letsplot.yaml b/plots/contour-basic/metadata/letsplot.yaml
index df82f90ab1..e8a679bd27 100644
--- a/plots/contour-basic/metadata/letsplot.yaml
+++ b/plots/contour-basic/metadata/letsplot.yaml
@@ -26,3 +26,181 @@ review:
   - The data shows a simple two-peak scenario; more complex features like saddle points
     could better demonstrate contour capabilities
   - Grid lines are absent due to theme_minimal; subtle grid could aid value estimation
+  image_description: The plot displays a filled contour visualization of two Gaussian
+    peaks on a 2D plane. The primary peak is located at approximately (1, 1) shown
+    in bright yellow/green colors indicating higher values (~1.0), and a secondary
+    smaller peak at approximately (-1, -1) shown in cyan/green indicating moderate
+    values (~0.7). The plot uses the viridis colormap transitioning from dark purple
+    (low values ~0) through blue, teal, green to yellow (high values). White contour
+    lines overlay the filled regions for added clarity. The X-axis is labeled "X Coordinate"
+    ranging from -2.5 to 3, and the Y-axis is labeled "Y Coordinate" ranging from
+    -2.5 to 3. The title reads "contour-basic · letsplot · pyplots.ai" and a colorbar
+    legend labeled "Value" appears on the right showing the scale from 0 to 0.8+.
+    The overall layout is clean with a minimal theme and good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend text are all clearly
+          readable at the output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Contour levels are well-defined with good contrast between filled
+          regions; white contour lines add clarity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe and provides excellent perceptual
+          uniformity
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions overall, though the plot area could use slightly
+          more padding on the left
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Coordinate", "Y Coordinate") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-placed; minimal theme removes grid which is acceptable
+          but subtle grid lines could enhance readability
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct contour plot with both filled regions and contour lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z data correctly mapped to contour visualization
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: colorbar present, both contour lines
+          and filled regions, appropriate colormap'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible, axes show complete surface
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Value" with accurate scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows two Gaussian peaks demonstrating contour levels and gradients;
+          could show more variation like saddle points or ridges
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Mathematical function is a valid demonstration; labeled as generic
+          coordinates rather than a domain-specific application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (0-1) and coordinate ranges (-3 to 3) are sensible for a mathematical
+          demonstration
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` is set (though not strictly needed for deterministic
+          data)'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2-style grammar with geom_contourf and geom_contour, scale_fill_viridis,
+          and theme_minimal. Good use of lets-plot's grammar of graphics approach,
+          but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/contour-basic/metadata/matplotlib.yaml b/plots/contour-basic/metadata/matplotlib.yaml
index a8969b8ef9..c96483d131 100644
--- a/plots/contour-basic/metadata/matplotlib.yaml
+++ b/plots/contour-basic/metadata/matplotlib.yaml
@@ -28,3 +28,182 @@ review:
     adjusting the color scale or level range
   - Some contour labels appear in multiple locations which could be reduced for cleaner
     presentation
+  image_description: 'The plot displays a contour visualization of a simulated mountain
+    terrain. It shows a 10km x 10km area with two distinct peaks: a main peak in the
+    northeast (around coordinates 7,7) reaching approximately 1000m elevation (shown
+    in bright yellow-green), and a secondary peak in the southwest (around coordinates
+    2,3) reaching about 600m (shown in cyan-green). A valley depression runs through
+    the center of the map (shown in white/light colors at around 200-300m elevation).
+    The viridis colormap transitions from dark purple (low elevations ~200m) through
+    blue, green, to yellow (high elevations ~1000m). White contour lines are overlaid
+    at 50m intervals, with thicker major contour lines every 200m labeled with elevation
+    values (400m, 600m, 800m). A colorbar on the right shows the elevation scale.
+    Axis labels indicate "Distance East (km)" and "Distance North (km)" with clear
+    tick marks. The title reads "Mountain Terrain · contour-basic · matplotlib · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; contour labels are well-placed and
+          readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Filled contours with appropriate alpha (0.9), contour lines visible
+          but not overwhelming
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe and provides excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with square aspect ratio; slight asymmetry with colorbar
+          but acceptable
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Distance East (km)", "Distance North
+          (km)", "Elevation (m)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid present (acceptable for contour plot), but no legend either.
+          Colorbar serves as legend but the plot could benefit from annotations for
+          peaks.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct contour plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y grid correctly mapped, Z values properly displayed as contours
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: filled regions (contourf), contour lines,
+          labeled levels, colorbar'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range displayed (0-10 km on both axes, 200-1000m elevation)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows elevation scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Mountain Terrain · contour-basic · matplotlib ·
+          pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows peaks, valley, gradients, and multiple terrain features; could
+          show more extreme variations
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Mountain terrain with realistic topographic features (peaks, ridges,
+          valleys)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Elevation range 200-1000m is realistic; 10km area is sensible scale
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` (though not strictly needed since data
+          is deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses contourf, contour, clabel effectively; could leverage more advanced
+          features like custom contour levels styling or hatching
+  verdict: APPROVED
diff --git a/plots/contour-basic/metadata/plotly.yaml b/plots/contour-basic/metadata/plotly.yaml
index c4a5139ebb..e77fd9fe90 100644
--- a/plots/contour-basic/metadata/plotly.yaml
+++ b/plots/contour-basic/metadata/plotly.yaml
@@ -25,3 +25,177 @@ review:
     (units)" for a realistic scenario)
   - Data context is purely mathematical rather than tied to a real-world application
     from the spec examples
+  image_description: 'The plot displays a filled contour visualization of a mathematical
+    surface (peaks-like function) using the Viridis colormap. The plot shows multiple
+    regions: a prominent yellow peak (maximum ~8) in the upper-center region around
+    coordinates (0, 1.5), several teal/cyan mid-value regions, a dark blue minor depression
+    around (-2, 0.5), and a deep purple/magenta minimum (~-6) in the lower-center
+    area around (0, -2). The colorbar on the right shows the Z Value scale ranging
+    from approximately -6 to 8. Contour lines are clearly drawn with white labels
+    showing the numeric values at each level. The title "contour-basic · plotly ·
+    pyplots.ai" appears centered at the top. Axis labels show "X Coordinate" and "Y
+    Coordinate" with tick marks from -3 to 3 on both axes.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 18pt, contour labels
+          at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, contour labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Filled contour regions clearly distinguishable, contour lines visible
+          with proper width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe with excellent perceptual uniformity
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with adequate margins, colorbar well positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Coordinate", "Y Coordinate") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid (appropriate for contour), colorbar serves as legend but
+          could use better title positioning
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct contour plot with filled regions and lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly assigned to grid coordinates and surface values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has filled regions, contour lines, colorbar, and labeled levels as
+          spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range from -3 to 3
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents Z values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both peaks and valleys, multiple local extrema, gradients,
+          and saddle-like regions - demonstrates full range of contour plot capabilities
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Mathematical function (peaks-like) is plausible for a basic contour
+          demo but generic; could represent elevation or any 2D field
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are sensible for mathematical function; 50x50 grid provides
+          smooth contours within spec range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions or classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Contour with showlabels, colorbar customization, and HTML
+          export for interactivity; could leverage more Plotly-specific features like
+          hover info customization or animation
+  verdict: APPROVED
diff --git a/plots/contour-basic/metadata/plotnine.yaml b/plots/contour-basic/metadata/plotnine.yaml
index 20c06c58cc..4c8573c851 100644
--- a/plots/contour-basic/metadata/plotnine.yaml
+++ b/plots/contour-basic/metadata/plotnine.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/contour-decision-boundary/metadata/altair.yaml b/plots/contour-decision-boundary/metadata/altair.yaml
index 2dbf537f19..68f845b383 100644
--- a/plots/contour-decision-boundary/metadata/altair.yaml
+++ b/plots/contour-decision-boundary/metadata/altair.yaml
@@ -27,3 +27,181 @@ review:
   - Axis labels lack units (could use Feature X1 normalized or similar)
   - The shape legend uses a workaround with invisible points which works but is not
     idiomatic Altair
+  image_description: 'The plot displays a KNN decision boundary visualization for
+    a two-moon classification dataset. The background shows two colored regions: blue
+    (#306998) for Class A and yellow (#FFD43B) for Class B, created using rect marks
+    with 0.4 opacity. Training points are overlaid as circles with dark strokes, where
+    correctly classified points appear as circles and misclassified points as triangles
+    with red strokes. The decision boundary clearly shows the non-linear separation
+    between the two crescent-shaped classes. The title "contour-decision-boundary
+    · altair · pyplots.ai" is prominently displayed at the top. Axes are labeled "Feature
+    X1" and "Feature X2" with clear tick marks. A comprehensive legend on the right
+    shows True Class (Class A/B), Classification status (Correct/Incorrect), and Decision
+    Region colors.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (250-350) with good stroke visibility. Slight
+          deduction as some points in dense areas could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with legend on right, slight imbalance with legend taking
+          up space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Feature X1" and "Feature X2" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at 0.3 opacity is subtle, legend is comprehensive but has three
+          separate sections which is slightly complex
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct decision boundary visualization with classifier regions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X1/X2 correctly mapped to features
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has decision regions, training points, class markers, misclassification
+          highlighting. Missing explicit contour lines (uses rect binning instead)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all elements
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but missing middle dot character (uses regular
+          dot)
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two-moon dataset with both classes, correct and incorrect classifications,
+          good boundary complexity. Could show more misclassified points for educational
+          value
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic ML dataset (make_moons) is a standard, appropriate choice
+          for demonstrating decision boundaries
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values in reasonable range (-1.5 to 2.5), typical for standardized
+          features
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → model → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (altair, numpy, pandas, sklearn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair 6.0 syntax
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Missing strict=True is actually present (line 43), but output format
+          is correct
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of layered charts, mark_rect for decision regions, mark_point
+          with shapes, tooltips for interactivity, proper encoding types. Could leverage
+          more Altair-specific features like selections
+  verdict: APPROVED
diff --git a/plots/contour-decision-boundary/metadata/bokeh.yaml b/plots/contour-decision-boundary/metadata/bokeh.yaml
index 1dc89a39a8..59f9c047f8 100644
--- a/plots/contour-decision-boundary/metadata/bokeh.yaml
+++ b/plots/contour-decision-boundary/metadata/bokeh.yaml
@@ -27,3 +27,184 @@ review:
     from the main chart
   - The Misclassified legend entry glyph could be more prominent to match the visual
     weight of the X markers in the plot
+  image_description: 'The plot displays a decision boundary visualization for a KNN
+    classifier on make_moons synthetic data. The background shows two distinct colored
+    regions: a light blue region (Class 0 decision area) in the upper-left and a light
+    cream/yellow region (Class 1 decision area) in the lower-right, with a curved
+    boundary between them following the characteristic moon shapes. Blue circular
+    markers represent Class 0 training points, and yellow circular markers represent
+    Class 1 training points. Red "X" markers clearly indicate misclassified points
+    scattered near the decision boundary. The title "contour-decision-boundary · bokeh
+    · pyplots.ai" appears at the top left. Axis labels show "Feature 1" (x-axis) and
+    "Feature 2" (y-axis). A legend on the right side shows "Class 0" and "Class 1"
+    entries. The grid is subtle with dashed lines. The overall layout is clean with
+    good use of the canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable at 36pt/28pt/22pt
+          sizes. Slightly generous sizing but effective.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (25px) with good alpha (0.9). Misclassified
+          X markers are clearly visible. Decision boundary regions have appropriate
+          transparency (0.5).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe. Red X markers for misclassified points add clear distinction.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with plot filling most of the space. Legend
+          placement on right is clean. Minor gap between plot and legend.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Feature 1" and "Feature 2" but no units (acceptable
+          for synthetic ML data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid with alpha 0.3, legend well-placed with appropriate
+          sizing
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct decision boundary visualization with colored regions and
+          overlaid training points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X1/X2 features correctly mapped to axes, class labels correctly shown
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: colored decision regions, training points
+          overlay, misclassified point markers, legend'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range with appropriate padding (±0.5)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Class 0, Class 1, and Misclassified entries
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but missing "Misclassified" in legend when viewed
+          (legend shows it but glyph is small)
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both classes well, includes misclassified points demonstrating
+          classifier limitations. Could show more boundary complexity.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: make_moons is a standard ML benchmark dataset, appropriate for demonstrating
+          decision boundaries
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 200 samples with noise=0.25 is reasonable. KNN with k=15 provides
+          smooth boundaries.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → model → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh 3.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource, HoverTool for interactivity, LinearColorMapper
+          for decision regions, custom Legend construction. Could leverage more Bokeh-specific
+          features.
+  verdict: APPROVED
diff --git a/plots/contour-decision-boundary/metadata/highcharts.yaml b/plots/contour-decision-boundary/metadata/highcharts.yaml
index 95cd6d2ed0..99c2b70629 100644
--- a/plots/contour-decision-boundary/metadata/highcharts.yaml
+++ b/plots/contour-decision-boundary/metadata/highcharts.yaml
@@ -27,3 +27,185 @@ review:
     visible)
   - Legend box with Training Data title and class names is not visible in the rendered
     image
+  image_description: 'The plot shows a decision boundary visualization for a KNN classifier
+    on moon-shaped data. The background is a heatmap with blue (#306998) regions on
+    the left side (Class 0 prediction probability) transitioning through a light gray/white
+    gradient in the middle to yellow (#FFD43B) regions on the right side (Class 1
+    prediction probability). Training data points are overlaid as scatter markers:
+    blue circles for Class 0 and yellow diamonds for Class 1. The title "contour-decision-boundary
+    · highcharts · pyplots.ai" appears at the top in bold, with a subtitle "KNN Classifier
+    Decision Boundary on Moon-shaped Data" below it. The Y-axis is labeled "Feature
+    X2" with tick values from -1.5 to 2.5. The X-axis shows tick values from -1.5
+    to 2.5. A small color axis legend appears on the right showing the 0.0 to 1.0
+    probability scale. The overall layout fills the canvas well with balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, and axis tick labels are clearly readable at the
+          high resolution; Y-axis label is visible but X-axis label is missing
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all data points and labels are clearly
+          separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are well-sized with dark borders making them stand out against
+          the heatmap background
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe blue and yellow palette, avoiding problematic
+          red-green combinations
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization (~60% filled by plot), minor issue with missing
+          x-axis label
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Feature X2" label but X-axis label "Feature X1" is not
+          visible
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle grid lines present; color axis legend is small but functional;
+          main legend not visible
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct decision boundary visualization with heatmap regions and
+          overlaid scatter points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X1 and X2 features correctly mapped to axes, class predictions to
+          color
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has colored regions, overlaid training points, different marker shapes;
+          missing visible legend box
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Color axis shows probability scale; class legend not visible in rendered
+          output
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "contour-decision-boundary · highcharts ·
+          pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows moon-shaped classification with clear boundary separation;
+          some misclassified points visible demonstrating model behavior
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic machine learning example using make_moons dataset, commonly
+          used for demonstrating classification algorithms
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Appropriate feature ranges for synthetic data; 150 samples is reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → classifier → heatmap →
+          scatter → HTML → screenshot'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Both np.random.seed(42) and random_state=42 set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (json, tempfile, time, urllib, Path, numpy,
+          selenium, sklearn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs used throughout
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts heatmap module with scatter overlay; builds JSON
+          options directly; good use of colorAxis for probability gradient
+  verdict: APPROVED
diff --git a/plots/contour-decision-boundary/metadata/letsplot.yaml b/plots/contour-decision-boundary/metadata/letsplot.yaml
index df22f358b0..d4c3bfd0b1 100644
--- a/plots/contour-decision-boundary/metadata/letsplot.yaml
+++ b/plots/contour-decision-boundary/metadata/letsplot.yaml
@@ -27,3 +27,178 @@ review:
     be consolidated
   - Axis labels lack meaningful domain context (just Feature X1/X2 since using synthetic
     data)
+  image_description: 'The plot displays a decision boundary classifier visualization
+    using the make_moons dataset. The feature space is divided into two regions: a
+    light blue region (predicted class 0) primarily covering the left and upper portions,
+    and a light yellow/gold region (predicted class 1) covering the right and lower
+    portions. The curved decision boundary between regions clearly shows the KNN classifier''s
+    ability to separate the two moon-shaped clusters. Training data points are overlaid
+    as circles (correctly classified) and X markers (incorrectly classified). Blue
+    circles/X marks represent true class 0, yellow circles/X marks represent true
+    class 1. Three separate legends on the right side explain: Predicted Class, True
+    Class, and Classification status. The title "contour-decision-boundary · letsplot
+    · pyplots.ai" appears at top. Axis labels show "Feature X1" (x-axis) and "Feature
+    X2" (y-axis).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          size with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=5) with appropriate visibility, though
+          some overlap near the decision boundary
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow color scheme is colorblind-safe, good contrast between
+          regions and points
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall layout, but three separate legends take up significant
+          space on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Feature X1", "Feature X2") but no units (though
+          not applicable for synthetic features)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean minimal theme, legends are well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct decision boundary visualization with filled regions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X1 and X2 correctly mapped to feature dimensions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: colored regions, overlaid training points,
+          marker styles for correct/incorrect, legend'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legends accurately describe classes and classification status
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-decision-boundary · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both classes, decision boundary, correct and incorrect classifications;
+          make_moons is a standard ML dataset
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Make_moons is a well-known synthetic classification dataset; plausible
+          ML demonstration scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Appropriate feature range for normalized synthetic data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but includes unnecessary plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (geom_tile, geom_point, scale_*_manual), theme_minimal;
+          could leverage more lets-plot specific interactive features
+  verdict: APPROVED
diff --git a/plots/contour-decision-boundary/metadata/matplotlib.yaml b/plots/contour-decision-boundary/metadata/matplotlib.yaml
index aa1e04e0ed..180005df93 100644
--- a/plots/contour-decision-boundary/metadata/matplotlib.yaml
+++ b/plots/contour-decision-boundary/metadata/matplotlib.yaml
@@ -28,3 +28,188 @@ review:
     - consider using a perceptually uniform colormap
   - Could leverage SVM decision_function to show prediction confidence as gradient
     rather than hard boundary regions
+  image_description: 'The plot displays a decision boundary visualization for a two-moon
+    classification dataset. The canvas is divided into two colored regions: a light
+    blue region (Class A prediction area) and a light coral/salmon region (Class B
+    prediction area). A curved white line marks the decision boundary between classes.
+    Training data points are overlaid as markers: blue solid circles for correctly
+    classified Class A samples, yellow/gold circles with black edges for correctly
+    classified Class B samples. Misclassified points from both classes are shown with
+    red X markers. The title "contour-decision-boundary · matplotlib · pyplots.ai"
+    appears at the top. Axis labels show "Feature X1" and "Feature X2". A legend in
+    the upper-left corner identifies all four marker types. The characteristic two-moon
+    shape is clearly visible in the data distribution, with the SVM boundary curving
+    appropriately to separate the interlocking crescents.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, legend at
+          14pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend well-positioned in upper left
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Markers sized at s=150/200 appropriate for 200 data points, good
+          alpha=0.9, white/black edges provide good contrast. Minor: some overlap
+          in dense areas near boundary'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide good contrast and are
+          distinguishable for most colorblind types. Coolwarm colormap is acceptable
+          but not ideal for colorblind viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, 16:9 aspect ratio utilized
+          effectively
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Feature X1", "Feature X2") but lack units
+          (expected for synthetic ML data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed with good framealpha. Grid appears very faint
+          or not visible despite code setting alpha=0.3
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct decision boundary contour visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X1 and X2 features correctly mapped to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: colored regions, overlaid training points,
+          different markers for correct/incorrect, legend present, trained classifier
+          used'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding (±0.5)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately describes all four point categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: `{spec-id} · {library} · pyplots.ai`'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows decision boundary, both classes, correct and misclassified
+          points. The noisy two-moon data demonstrates non-linear boundary capability
+          well. Minor: could benefit from more varied misclassification patterns'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic ML synthetic dataset (make_moons) widely used in tutorials
+          and research
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Synthetic data with appropriate noise level (0.25) creates realistic
+          classification challenge. Values are in reasonable range for standardized
+          features
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → train → plot → save structure, no unnecessary
+          functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Both `np.random.seed(42)` and `random_state=42` ensure reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: matplotlib, numpy, sklearn.datasets, sklearn.svm'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern matplotlib API, axes methods used correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses matplotlib's contourf and contour for decision regions and boundary
+          line. Uses scatter with marker differentiation. However, could leverage
+          more advanced features like decision_function for probability shading or
+          support vector highlighting
+  verdict: APPROVED
diff --git a/plots/contour-decision-boundary/metadata/plotly.yaml b/plots/contour-decision-boundary/metadata/plotly.yaml
index 2bf8b8b2cc..09d29a7f3f 100644
--- a/plots/contour-decision-boundary/metadata/plotly.yaml
+++ b/plots/contour-decision-boundary/metadata/plotly.yaml
@@ -23,3 +23,177 @@ review:
   - Legend position in upper-left overlaps slightly with some Class 0 data points
   - Could enhance interactivity by adding hover info to the contour regions showing
     probability values
+  image_description: The plot displays a decision boundary visualization for a KNN
+    classifier trained on moon-shaped data. The background shows a continuous gradient
+    from blue (Class 0 probability) to yellow (Class 1 probability), with a dashed
+    yellow/orange decision boundary line at the 0.5 probability threshold. Blue circular
+    markers represent Class 0 data points (clustered in upper-left region), while
+    yellow diamond markers represent Class 1 data points (clustered in lower-right
+    region). The title reads "contour-decision-boundary · plotly · pyplots.ai" at
+    the top. Axis labels show "Feature 1 (Standardized)" and "Feature 2 (Standardized)".
+    A colorbar on the right indicates "Class Probability" from 0 to 1. The legend
+    in the upper-left shows Class 0 (circle) and Class 1 (diamond).
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and colorbar text all clearly readable
+          at proper sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized with good contrast; white/black borders help visibility
+          against background
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow palette is colorblind-friendly; good contrast between
+          classes
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot fills appropriate area with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Feature 1 (Standardized)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle but legend overlaps slightly with data points in upper-left
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct decision boundary contour visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X1/X2 features correctly mapped to axes, class labels shown via markers
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has colored regions, training points overlay, legend, decision boundary
+          line
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Class 0 and Class 1 with matching markers
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "{spec-id} · {library} · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows curved decision boundary, probability gradient, and overlapping
+          classes well; could show some misclassified points more prominently
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Synthetic make_moons data is standard ML example, but context is
+          generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standardized values are sensible for ML features
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → model → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Fixed seed (np.random.seed(42) and random_state=42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, plotly.graph_objects, sklearn utilities)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Minor: Saves both plot.png and plot.html (fine, but code style could
+          be cleaner)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of go.Contour for probability gradient and decision boundary
+          line, but could leverage more Plotly interactivity features like hover customization
+          for the contour regions
+  verdict: APPROVED
diff --git a/plots/contour-decision-boundary/metadata/plotnine.yaml b/plots/contour-decision-boundary/metadata/plotnine.yaml
index b8a607bbe9..eb902fd051 100644
--- a/plots/contour-decision-boundary/metadata/plotnine.yaml
+++ b/plots/contour-decision-boundary/metadata/plotnine.yaml
@@ -28,3 +28,180 @@ review:
   - Legend could be more compact to allow more space for the plot area
   - Grid lines completely removed - subtle grid could help with reading exact coordinate
     values
+  image_description: 'The plot displays a decision boundary visualization using the
+    classic two-moon synthetic dataset. The feature space is divided into two distinct
+    regions: a blue region (class 0, #306998) covering the upper-left area and a yellow
+    region (class 1, #FFD43B) covering the lower-right area. The boundary between
+    regions shows a smooth, nonlinear curve characteristic of an SVM with RBF kernel.
+    Training points are overlaid as dark blue circles (class 0) and olive/gold circles
+    (class 1) for correctly classified points, while misclassified points are displayed
+    with X markers. The title "contour-decision-boundary · plotnine · pyplots.ai"
+    is centered at the top. Axis labels are "Feature X1" (x-axis) and "Feature X2"
+    (y-axis). A two-section legend on the right shows "Predicted Region" (0/1) and
+    "Training Points" (Class 0, Class 1, Class 0 misclassified, Class 1 misclassified).
+    The plot uses a minimal theme with no grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt, legend text appropriately
+          sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points well-sized and visible, alpha on regions appropriate; slight
+          deduction as some points near boundary overlap slightly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe, good contrast between
+          classes
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with 16:9 ratio, legend placement is good
+          but takes some horizontal space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Feature X1/X2" but no units (appropriate for
+          synthetic data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid removed (acceptable), legend is well-organized but could be
+          slightly more compact
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct decision boundary visualization with filled regions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X1/X2 features correctly mapped to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has colored regions, training points overlay, misclassification markers,
+          legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full extent of data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend clearly shows predicted regions and training point classifications
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows decision boundary, class regions, training points, and misclassifications;
+          could show more varied classifier behavior
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: make_moons is a standard ML benchmark, synthetic but appropriate
+          for demonstrating decision boundaries
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values in reasonable range (-2 to 3)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → model → mesh → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) and random_state=42 used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine and sklearn APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar (ggplot + geom_tile + geom_point), scale_*_manual,
+          theme customization. However, no use of more advanced plotnine features
+          like faceting or statistical transformations
+  verdict: APPROVED
diff --git a/plots/contour-decision-boundary/metadata/pygal.yaml b/plots/contour-decision-boundary/metadata/pygal.yaml
index 86e9a460ba..086a11c5b4 100644
--- a/plots/contour-decision-boundary/metadata/pygal.yaml
+++ b/plots/contour-decision-boundary/metadata/pygal.yaml
@@ -26,3 +26,180 @@ review:
     for coordinate mapping
   - Axis labels lack units (acceptable for synthetic ML data, but Feature 1 normalized
     would be better)
+  image_description: 'The plot displays a decision boundary visualization with two
+    moon-shaped class regions. The background is divided into two color zones: light
+    blue (Class 0) and light yellow (Class 1), with a curved boundary between them
+    following the characteristic moon pattern from sklearn''s make_moons dataset.
+    Training points are overlaid as solid circles - blue circles for Class 0 and yellow
+    circles for Class 1. Misclassified points are marked with red X symbols on top
+    of their class-colored circles. The title "contour-decision-boundary · pygal ·
+    pyplots.ai" appears at the top. Axis labels show "Feature 1" (x-axis) and "Feature
+    2" (y-axis) with clear tick marks ranging from approximately -1.9 to 2.9 on x-axis
+    and -1.4 to 2.0 on y-axis. A legend in the top-right corner shows Class 0, Class
+    1, and Misclassified markers.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend are all clearly readable
+          at the target resolution. Slightly smaller legend text.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized for the data density (~150 points), misclassified
+          markers clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe, good contrast between classes
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, legend placed appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Feature 1" and "Feature 2" are descriptive but lack units (acceptable
+          for synthetic ML data)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid (acceptable for this plot type), legend well placed but could
+          be cleaner
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct decision boundary visualization with colored regions and
+          overlaid points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X1/X2 correctly mapped to axes, class labels correctly colored
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Decision regions, training points, misclassified point markers, legend
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Class 0, Class 1, and Misclassified
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "contour-decision-boundary · pygal · pyplots.ai"
+          but title font could be slightly larger
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both classes, decision boundary, and misclassified points well.
+          Good representation of the moon-shaped classification problem.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses sklearn's make_moons, a standard ML teaching dataset, with SVM
+          classifier - excellent educational context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Appropriate feature ranges for synthetic normalized data
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear structure with helper function for coordinate mapping
+          (acceptable)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Outputs plot.png and plot.svg, but also plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of pygal's XY chart as a base with custom SVG injection
+          to create decision boundary regions. This is an excellent workaround for
+          pygal's limited contour capabilities.
+  verdict: APPROVED
diff --git a/plots/contour-decision-boundary/metadata/seaborn.yaml b/plots/contour-decision-boundary/metadata/seaborn.yaml
index eb343afa31..117bbf59f6 100644
--- a/plots/contour-decision-boundary/metadata/seaborn.yaml
+++ b/plots/contour-decision-boundary/metadata/seaborn.yaml
@@ -24,3 +24,168 @@ review:
   weaknesses:
   - Axis labels lack units (Feature X1/X2 rather than describing the specific feature
     dimensions)
+  image_description: 'The plot shows a decision boundary visualization for a 2-class
+    SVM classifier trained on make_moons data. The feature space is divided into two
+    colored regions: a blue region (Class 0) on the left/top portion and a yellow
+    region (Class 1) on the right/bottom portion. A dark blue curved line marks the
+    decision boundary between the two classes. Training points are overlaid as circular
+    markers - blue circles for Class 0 and yellow circles for Class 1. Misclassified
+    points are shown with red X markers with red edges, clustered primarily near the
+    decision boundary where the two moon shapes overlap. The title "contour-decision-boundary
+    · seaborn · pyplots.ai" appears at the top, axis labels show "Feature X1" and
+    "Feature X2", and an "SVM Accuracy: 94.0%" box is displayed in the upper-left
+    corner. A comprehensive legend in the upper-right explains all visual elements.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers well-sized (s=200/250), appropriate alpha (0.9) for 200 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend near data
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but without units ("Feature X1" rather than specific
+          feature names with units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Whitegrid style appropriate, legend well-placed and comprehensive
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct decision boundary contour plot with classifier regions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X1/X2 features correctly mapped to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: colored regions, decision boundary line,
+          training points overlay, legend with class labels, different markers for
+          correct vs incorrect classifications'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding (±0.5)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly explains all visual elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both classes, decision boundary, correctly and incorrectly
+          classified points, demonstrates non-linear SVM boundary capability
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: make_moons is a standard ML benchmark dataset, neutral and appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values in reasonable range (-2 to 2.5), noise level (0.25)
+          creates realistic classification challenge
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → train → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42 in make_moons
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: matplotlib, numpy, seaborn, sklearn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/contour-density/metadata/bokeh.yaml b/plots/contour-density/metadata/bokeh.yaml
index 85695603b5..d534426865 100644
--- a/plots/contour-density/metadata/bokeh.yaml
+++ b/plots/contour-density/metadata/bokeh.yaml
@@ -23,3 +23,182 @@ review:
   - The legend text "Data points" is minimal and could be more descriptive
   - Relies heavily on matplotlib for contour extraction rather than using a more Bokeh-native
     solution
+  image_description: The plot displays a density contour plot with two distinct bivariate
+    clusters. The contours are rendered in a blue gradient from light blue (outer,
+    lower density) to dark blue/black (inner, higher density). There are 8 contour
+    levels visible, clearly showing two separate density peaks - one centered around
+    (25, 35) and another around (40, 50). Yellow/gold scatter points are overlaid
+    showing the actual data distribution with appropriate transparency. The title
+    "contour-density · bokeh · pyplots.ai" appears in the top left. X-axis is labeled
+    "Measurement A (units)" and Y-axis is labeled "Measurement B (units)". A legend
+    for "Data points" appears in the top left. The plot has a clean white background
+    with subtle dashed grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          size with appropriate font sizes (28pt title, 22pt axis labels, 18pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Contour lines are clearly visible with good line width, scatter points
+          visible but could be slightly larger for 500 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue gradient for contours is colorblind-safe, yellow points provide
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot is well-centered, slight imbalance
+          with legend position
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Measurement A (units)" and "Measurement
+          B (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriately subtle (alpha 0.3, dashed), but legend is very
+          sparse (just "Data points") and positioned in a way that overlaps slightly
+          with the title
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density contour plot using KDE
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y variables correctly assigned and displayed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows KDE contours, multiple density levels, scatter overlay for
+          context as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels the scatter points
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-density · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows two distinct clusters demonstrating density variation, contour
+          gradients clearly visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Generic "Measurement A/B" context is plausible but not specifically
+          compelling
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in sensible ranges (10-60 units), appropriate for generic
+          measurements
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib for contour extraction, numpy, bokeh,
+          scipy)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Minor: matplotlib imported but only used for contour line extraction,
+          not plotting'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and export_png correctly, but relies on matplotlib
+          for the heavy lifting (contour extraction). The Bokeh-native approach would
+          be more distinctive.
+  verdict: APPROVED
diff --git a/plots/contour-density/metadata/highcharts.yaml b/plots/contour-density/metadata/highcharts.yaml
index a64b1ab2a3..5d587b7f42 100644
--- a/plots/contour-density/metadata/highcharts.yaml
+++ b/plots/contour-density/metadata/highcharts.yaml
@@ -25,3 +25,182 @@ review:
   - Grid lines could be more subtle (currently using Dot style which is adequate but
     slightly prominent)
   - Scatter points are somewhat small relative to the contour line thickness
+  image_description: "The plot displays a density contour visualization with three\
+    \ distinct clusters of data points. The title reads \"contour-density · highcharts\
+    \ · pyplots.ai\" at the top. The X-axis is labeled \"Measurement A (normalized\
+    \ units)\" ranging from approximately -5 to 5.75, and the Y-axis is labeled \"\
+    Measurement B (normalized units)\" ranging from approximately -6 to 7.5. \n\n\
+    Three clusters are clearly visible: a high-density cluster in the upper-right\
+    \ area (around x=2.5, y=3), a medium-density cluster in the left-center area (around\
+    \ x=-1.5, y=1), and a lower-density cluster in the bottom-center area (around\
+    \ x=0.5, y=-1.5). The contours use a viridis-inspired colorblind-safe color scheme\
+    \ progressing from dark purple (10%) through teal (50%) to yellow (90%) for the\
+    \ highest density regions. Semi-transparent blue scatter points are overlaid on\
+    \ the contours. A legend on the right shows \"Density Scale\" with entries for\
+    \ Data Points and density levels (90%, 70%, 50%, 30%, 10%). Contour level annotations\
+    \ (20%, 50%, 80%) appear directly on the plot."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are clearly readable at the high
+          resolution. Font sizes are well-scaled.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; contour labels and legend are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Contour lines are thick and visible; scatter points could be slightly
+          more prominent but are adequate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent viridis-inspired colorblind-safe palette from purple through
+          teal to yellow
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot area is well-proportioned with appropriate
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Measurement A (normalized units)",
+          "Measurement B (normalized units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-positioned and informative; grid could be slightly
+          more subtle
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements density contour plot with KDE-based contours
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y variables correctly mapped as continuous bivariate data
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Contour lines, density gradient levels, scatter overlay all present;
+          minor: filled contours mentioned in spec as alternative'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points and contours fully visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows density scale and data points
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-density · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three clusters with different densities demonstrating the contour
+          visualization well; could show more variation in cluster shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic scientific measurement scenario is plausible and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalized units with sensible ranges for clustered data
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Generally linear flow but includes complex inline marching squares
+          algorithm which is acceptable for this use case
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible results
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves to plot.png but also creates plot.html (minor)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Highcharts features including annotations for contour
+          labels, line series for contours, scatter series with tooltips, and proper
+          legend configuration
+  verdict: APPROVED
diff --git a/plots/contour-density/metadata/letsplot.yaml b/plots/contour-density/metadata/letsplot.yaml
index 259809a6e3..f5912bfa93 100644
--- a/plots/contour-density/metadata/letsplot.yaml
+++ b/plots/contour-density/metadata/letsplot.yaml
@@ -24,3 +24,172 @@ review:
   weaknesses:
   - Scatter point alpha could be slightly lower (0.2) for better contour visibility
     in dense areas
+  image_description: 'The plot displays a density contour visualization on a light
+    gray background with subtle grid lines. Three distinct clusters are visible through
+    concentric blue contour lines (#306998 color): one in the upper-left area centered
+    around ~15°C/80% humidity (small, dense cluster), one in the lower-center area
+    around ~22°C/55% humidity (large, dense cluster with many contour levels), and
+    one on the right side around ~32°C/75% humidity (more diffuse cluster). Yellow-gold
+    scatter points (#FFD43B) with low alpha are overlaid showing the underlying data
+    distribution. The x-axis shows "Temperature (°C)" ranging from ~8 to 46, and the
+    y-axis shows "Relative Humidity (%)" ranging from ~35 to 100. The title "contour-density
+    · letsplot · pyplots.ai" appears at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Contour lines are clearly visible; scatter points slightly compete
+          with contours in dense areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" and "Relative Humidity (%)" include units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Subtle grid present; no legend needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density contour plot with geom_density2d
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned to temperature/humidity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: KDE contours, scatter overlay for context as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; plot is self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "contour-density · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple clusters with different densities and spreads (dense
+          vs diffuse)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature/humidity is a real-world scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic though humidity reaching 100% is at the physical
+          limit
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses geom_density2d (ggplot2 grammar), proper ggsize scaling, and
+          theme customization
+  verdict: APPROVED
diff --git a/plots/contour-density/metadata/matplotlib.yaml b/plots/contour-density/metadata/matplotlib.yaml
index 9668aa755e..bf00da1c5e 100644
--- a/plots/contour-density/metadata/matplotlib.yaml
+++ b/plots/contour-density/metadata/matplotlib.yaml
@@ -24,3 +24,174 @@ review:
     with units
   - Does not use matplotlib contour label feature (ax.clabel) which would add value
   - Scatter point size (s=30) is on the smaller side for overlay context
+  image_description: 'The plot displays a density contour visualization with filled
+    blue contours showing concentration levels of points. Two distinct clusters are
+    visible: a larger, denser cluster centered around (4-5, 4.5-5.5) and a smaller,
+    secondary cluster centered around (9-10, 8-9). The plot uses a "Blues" colormap
+    ranging from light blue (low density ~0.000) to darker blue (high density ~0.048).
+    Yellow/gold scatter points with dark blue edges are overlaid on the contours for
+    context. The title reads "contour-density · matplotlib · pyplots.ai" at the top,
+    with "X Variable" on the x-axis and "Y Variable" on the y-axis. A colorbar labeled
+    "Density" appears on the right side. The grid is subtle with dashed lines at alpha
+    0.3.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Contours are clearly visible; scatter points are slightly small (s=30)
+          for 450 points but appropriate for density overlay context
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues colormap is colorblind-safe, yellow points provide good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, slight asymmetry with colorbar but overall
+          balanced
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Variable", "Y Variable") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; colorbar well-placed but no legend
+          for scatter points
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density contour plot with KDE
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly mapped to continuous variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has contour lines, filled contours, scatter overlay as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly shows density values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-density · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two clusters demonstrating density pattern detection; could
+          show more varied cluster shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible bivariate data scenario, though generic (not a specific
+          real-world context)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible and demonstrate the visualization well
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, scipy.stats)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses matplotlib's contourf/contour/scatter correctly, but scipy.stats.gaussian_kde
+          does the heavy lifting; could use matplotlib-specific features like clabel
+          for contour labels
+  verdict: APPROVED
diff --git a/plots/contour-density/metadata/plotly.yaml b/plots/contour-density/metadata/plotly.yaml
index 6899324edd..ab7b90fb22 100644
--- a/plots/contour-density/metadata/plotly.yaml
+++ b/plots/contour-density/metadata/plotly.yaml
@@ -25,3 +25,173 @@ review:
   - Axis labels are generic (X Variable, Y Variable) rather than tied to a realistic
     context
   - Grid lines are too subtle (alpha 0.1) - consider alpha 0.2-0.3 for better visibility
+  image_description: 'The plot displays a density contour visualization with three
+    distinct clusters. The colorscale transitions from transparent/cream at low density
+    through yellow (#FFD43B) at medium density to dark blue (#306998/#1a3d5c) at high
+    density. Three cluster centers are visible: one at approximately (2, 3), another
+    at (5, 5), and a third at (7, 2.5). Semi-transparent scatter points are overlaid
+    showing the underlying data distribution. The title "contour-density · plotly
+    · pyplots.ai" is centered at the top. Axis labels "X Variable" and "Y Variable"
+    are clearly displayed. A colorbar on the right shows the density scale from 0
+    to ~45. The background is white with subtle grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Contour fills are well-defined, scatter overlay uses appropriate
+          alpha (0.3) for 500 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow-to-blue colorscale is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but generic (no units or specific context)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Colorbar well-placed, but grid alpha at 0.1 is too subtle/barely
+          visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density contour plot using Histogram2dContour
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned to continuous variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has contour lines, filled contours, scatter overlay as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled as "Density"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "contour-density · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple clusters with varying densities, demonstrates core
+          use case of finding density patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Plausible bivariate data but generic (not tied to real-world scenario
+          like scientific measurements)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in sensible range (0-8), appropriate for demonstration
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set correctly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dimensions
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses Histogram2dContour which is Plotly-specific, but does not leverage
+          Plotly's interactive features in the static output (hover tooltips would
+          enhance HTML version)
+  verdict: APPROVED
diff --git a/plots/contour-density/metadata/plotnine.yaml b/plots/contour-density/metadata/plotnine.yaml
index 8d967522ca..f2312e613b 100644
--- a/plots/contour-density/metadata/plotnine.yaml
+++ b/plots/contour-density/metadata/plotnine.yaml
@@ -23,3 +23,173 @@ review:
   - Could utilize filled contours for stronger visual impact as mentioned in spec
   - Contour line thickness could be slightly thicker for better visibility at smaller
     viewing sizes
+  image_description: 'The plot displays a density contour visualization with three
+    distinct clusters visible against a minimal gray background. Blue/gray semi-transparent
+    scatter points (alpha ~0.2) show the raw data distribution, while golden-yellow
+    contour lines (color #FFD43B) outline regions of equal density. The main cluster
+    is centered around (50, 45), a secondary cluster at (75, 70), and a smaller cluster
+    at (30, 75). The title "contour-density · plotnine · pyplots.ai" appears at the
+    top in black text. Axis labels read "Measurement A" (x-axis) and "Measurement
+    B" (y-axis). The plot uses a clean minimal theme with subtle gray gridlines. The
+    16:9 aspect ratio is properly utilized with good canvas coverage.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          proper sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter points visible with good alpha, contours clearly visible;
+          points could be slightly larger for optimal visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density contour plot type using geom_density_2d
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y continuous variables correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Contour lines showing density, scatter overlay for context (as mentioned
+          in spec notes)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A, no legend required
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: contour-density · plotnine · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple clusters at different densities demonstrating the
+          contour concept well; could show more density levels
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Generic measurement data is neutral and plausible for scientific/quality
+          control scenarios
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable; generic measurement units work
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) properly set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses geom_density_2d which is the standard approach; could leverage
+          additional ggplot grammar features like filled contours with stat_density_2d
+          or color-mapped density
+  verdict: APPROVED
diff --git a/plots/contour-density/metadata/pygal.yaml b/plots/contour-density/metadata/pygal.yaml
index c0dfed0666..76505d58f0 100644
--- a/plots/contour-density/metadata/pygal.yaml
+++ b/plots/contour-density/metadata/pygal.yaml
@@ -27,3 +27,181 @@ review:
   - Scatter overlay points lack a legend entry explaining what they represent
   - Helper function used for color interpolation deviates slightly from pure KISS
     structure
+  image_description: 'The plot displays a density contour visualization on a white
+    background with the title "contour-density · pygal · pyplots.ai" at the top. The
+    X-axis shows "Temperature (°C)" ranging from 6 to 39, and the Y-axis shows "Humidity
+    (%)" ranging from 13 to 111. The visualization uses a sequential blue color gradient
+    (light blue to dark blue) to represent density values. Contour lines are overlaid
+    on filled density regions, showing three distinct clusters: a high-density cluster
+    around (15°C, 75% humidity), another high-density cluster around (28°C, 45% humidity),
+    and a medium-density area around (22°C, 60% humidity). A vertical colorbar on
+    the right displays density values from 0.0000 to 0.0021. Semi-transparent yellow/gold
+    scatter points are sparsely overlaid to provide context for the underlying data
+    distribution.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          target resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Density gradient and contour lines are clearly visible; scatter overlay
+          points could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue sequential colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins; colorbar well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Temperature (°C)"
+          and "Humidity (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid lines shown; colorbar serves as legend but no traditional
+          legend for scatter overlay
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements density contour plot with KDE-based contours
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (Temperature) and Y (Humidity) correctly assigned as continuous
+          variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: contour lines, density gradient, scatter
+          overlay for context'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Colorbar present but scatter overlay points lack legend explanation
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "contour-density · pygal · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple clusters with varying densities, demonstrating the
+          full capability of density contour visualization
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Weather sensor readings (temperature vs humidity) is a neutral, scientifically
+          plausible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range (6-39°C) and humidity range (13-111%) are realistic
+          for weather data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Code uses helper function (interpolate_color) which slightly deviates
+          from pure KISS, but necessary for the complex visualization
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Creatively uses pygal's XY chart as a base and injects custom SVG
+          for contour rendering. This is an innovative workaround since pygal does
+          not natively support contour plots, but the heavy reliance on raw SVG manipulation
+          means limited use of pygal's actual features.
+  verdict: APPROVED
diff --git a/plots/contour-density/metadata/seaborn.yaml b/plots/contour-density/metadata/seaborn.yaml
index 21bdb627d6..ba606e0c35 100644
--- a/plots/contour-density/metadata/seaborn.yaml
+++ b/plots/contour-density/metadata/seaborn.yaml
@@ -22,3 +22,174 @@ review:
   weaknesses:
   - No colorbar/legend to show the density scale mapping
   - Generic axis labels (X Variable, Y Variable) rather than a real-world scenario
+  image_description: 'The plot displays a 2D density contour visualization with filled
+    contours using the viridis colormap (purple for low density transitioning through
+    blue/green to yellow for high density). Two distinct cluster centers are clearly
+    visible: a larger main cluster centered around (5, 4-5) and a smaller secondary
+    cluster around (9-10, 7-8). White semi-transparent scatter points are overlaid
+    on the contours to show the underlying data distribution. The title "contour-density
+    · seaborn · pyplots.ai" appears at the top. Both axes have descriptive labels
+    with units: "X Variable (units)" and "Y Variable (units)". A subtle dashed grid
+    is present in the background. The plot fills the canvas well with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes (24pt title, 20pt labels, 16pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Contour levels are well-defined, scatter overlay uses appropriate
+          size (s=15) and alpha (0.3) for 450 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "X Variable (units)", "Y Variable
+          (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate, but no legend/colorbar to indicate
+          density scale
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density contour plot using KDE
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y variables correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has filled contours, contour lines, and scatter overlay as mentioned
+          in spec notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this plot type (density is shown via color gradient)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-density · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows two clusters of different sizes/densities, demonstrating the
+          power of density visualization
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Generic "X Variable" and "Y Variable" labels are plausible but not
+          a real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in reasonable ranges (0-14 for X, 0-12 for Y)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: matplotlib, numpy, seaborn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of seaborn's kdeplot with both filled contours and
+          line contours, demonstrating library's statistical visualization strengths
+  verdict: APPROVED
diff --git a/plots/contour-filled/metadata/altair.yaml b/plots/contour-filled/metadata/altair.yaml
index 355ac80232..0ec9844661 100644
--- a/plots/contour-filled/metadata/altair.yaml
+++ b/plots/contour-filled/metadata/altair.yaml
@@ -23,3 +23,175 @@ review:
     clear level identification as specified
   - The white point markers for contour lines create visual noise rather than clean
     isolines
+  image_description: 'The plot displays a filled contour visualization of a 2D scalar
+    field using the viridis colormap. The image shows two prominent Gaussian peaks:
+    one bright yellow peak at approximately (1, 1) and another slightly dimmer yellow-green
+    peak at (-1, -0.5). There is also a subtle valley/depression near (0, 1.5) visible
+    as a darker purple region. The X and Y axes are labeled "X Coordinate" and "Y
+    Coordinate" respectively, ranging from -3 to 3. The title "contour-filled · altair
+    · pyplots.ai" appears at the top. A colorbar on the right labeled "Intensity"
+    shows the color mapping from approximately -0.2 (dark purple) to 1.2+ (bright
+    yellow). The plot has good visual balance with the chart filling most of the canvas.
+    White points are scattered throughout indicating contour level boundaries, though
+    they appear as noise rather than clean isolines.'
+  criteria_checklist:
+    visual_quality:
+      score: 34
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title 28pt, axis labels 22pt, tick labels 18pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Color bands clearly visible, though white contour points create some
+          visual noise
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions, though aspect ratio is slightly non-square for
+          the data domain
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (generic X/Y coordinates)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar well-placed and sized, but subtle grid lines visible in
+          background
+    spec_compliance:
+      score: 22
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled contour representation using colored regions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y coordinates correctly mapped, Z values to color
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Has filled regions and colorbar; contour line overlay attempted but
+          not as clean isolines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled as "Intensity"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: contour-filled · altair · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple peaks, valley, and subtle ripples demonstrating surface
+          variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Mathematical function visualization, plausible scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Reasonable range for mathematical function (-3 to 3)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions or classes, follows imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Code uses current API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses mark_rect for heatmap-style filled contours, proper encoding
+          with scales, but doesn't leverage Altair's declarative strengths fully for
+          this plot type
+  verdict: APPROVED
diff --git a/plots/contour-filled/metadata/bokeh.yaml b/plots/contour-filled/metadata/bokeh.yaml
index 66b671f04d..6b2b5d2803 100644
--- a/plots/contour-filled/metadata/bokeh.yaml
+++ b/plots/contour-filled/metadata/bokeh.yaml
@@ -24,3 +24,177 @@ review:
   - Grid styling still somewhat prominent (alpha 0.3 with width 2); could be reduced
     further for cleaner contour visualization
   - Colorbar title is slightly small relative to other text elements
+  image_description: The plot shows a filled contour visualization of terrain elevation
+    data using the Viridis colormap. The image displays a 2D surface with multiple
+    Gaussian peaks representing elevation values from 0 to 2000 meters. There are
+    three prominent bright yellow-green peaks (highest elevations around 1800-2000m)
+    and one darker purple depression (lowest elevations). White contour lines with
+    thin dark outlines are overlaid on the filled surface for precise level identification.
+    The title "contour-filled · bokeh · pyplots.ai" appears in the top-left corner.
+    The X-axis is labeled "Distance East (km)" and the Y-axis is labeled "Distance
+    North (km)". A vertical colorbar on the right side shows "Terrain Elevation (m)"
+    with tick marks from 0 to 2000. A subtle white dashed grid overlays the plot.
+    The overall layout is well-balanced with the plot filling most of the canvas.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Filled surface and contour lines are well-visible; contour lines
+          use effective white+dark styling
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight margin imbalance with colorbar
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units ("Distance East (km)", "Distance North
+          (km)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid alpha at 0.3 with width 2 is still somewhat prominent; overlays
+          the contour visualization more than ideal
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled contour plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y coordinates correctly mapped, Z values shown as color fill
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential colormap, colorbar, overlaid contour lines, smooth color
+          transitions
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly shows elevation mapping
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "contour-filled · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple peaks and one valley, demonstrating contour behavior
+          well; could show more dramatic gradient variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Terrain elevation is a perfect, neutral real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 0-2000m elevation is realistic; 80x80 grid provides smooth transitions
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly, but axis labels set via figure() params
+          would be cleaner
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses bokeh's image, HoverTool for interactivity, and contourpy integration;
+          however, hover functionality only works in HTML output, not visible in PNG.
+          Interactive tools (pan, zoom) are appropriate for Bokeh's strengths.
+  verdict: APPROVED
diff --git a/plots/contour-filled/metadata/highcharts.yaml b/plots/contour-filled/metadata/highcharts.yaml
index 7e325c6342..2a6afda958 100644
--- a/plots/contour-filled/metadata/highcharts.yaml
+++ b/plots/contour-filled/metadata/highcharts.yaml
@@ -26,3 +26,179 @@ review:
   - Code structure includes helper functions (marching_squares_contour, connect_segments)
     which violates the KISS principle - should be flat script structure
   - Colorbar/legend takes significant right margin space; could be more compact
+  image_description: The plot displays a filled contour visualization showing a 2D
+    scalar field with multiple Gaussian peaks. The colormap uses viridis (colorblind-safe),
+    ranging from dark purple (~0%) through teal/green (~50%) to bright yellow (~100%).
+    The plot has a clear title "contour-filled · highcharts · pyplots.ai" at the top.
+    X and Y axes are labeled "X Position (units)" and "Y Position (units)" respectively,
+    ranging from -4.0 to 4.0. White contour lines with black shadows are overlaid
+    on the heatmap for precise level identification. Contour level labels (10%, 30%,
+    50%, 70%, 90%) are shown with white backgrounds. A vertical colorbar on the right
+    shows "Intensity (%)" from 0% to 100%. The plot shows three main high-intensity
+    peaks (yellow regions) and one low-intensity valley (dark purple region), demonstrating
+    the multi-Gaussian surface effectively.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and colorbar text are all clearly readable. Slight
+          deduction as tick labels could be larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, contour labels are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells are well-sized for the 80x80 grid, contour lines visible
+          with shadow technique
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight deduction for colorbar taking significant
+          right margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "X Position (units)",
+          "Y Position (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid lines (acceptable for heatmap), but colorbar title "Intensity
+          (%)" is functional
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled contour plot using heatmap with overlaid contour lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y grid correctly mapped, Z values as color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, contour lines overlaid, appropriate number of levels
+          (15)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full -4 to 4 range displayed on both axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows 0-100% range
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "contour-filled · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows multiple peaks and one valley, demonstrates gradient transitions
+          well. Minor deduction: could show more diverse contour shapes'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Gaussian peaks represent plausible atmospheric/temperature distribution
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalized 0-100% scale is sensible, grid coordinates -4 to 4 appropriate
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Code includes functions (marching_squares_contour, connect_segments)
+          which violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Highcharts heatmap with color axis, annotations
+          for contour labels, custom line series overlay, CDP screenshot for high-resolution
+          output
+  verdict: APPROVED
diff --git a/plots/contour-filled/metadata/letsplot.yaml b/plots/contour-filled/metadata/letsplot.yaml
index 3549ddf6d3..d0b8890710 100644
--- a/plots/contour-filled/metadata/letsplot.yaml
+++ b/plots/contour-filled/metadata/letsplot.yaml
@@ -26,3 +26,177 @@ review:
     real-world context with units
   - Data context is mathematical rather than a real-world application scenario (e.g.,
     topographic elevation, temperature distribution)
+  image_description: 'The plot displays a filled contour visualization of a 2D scalar
+    field with the plasma colormap from viridis. Two prominent peaks appear as bright
+    yellow/orange concentric regions: a main peak at approximately (1.5, 1.5) and
+    a secondary peak at (-1.5, -1.5). A subtle depression is visible at (1.5, -1.5)
+    appearing as a slightly darker blue-purple region. A diagonal ridge connects features
+    across the plot. The colors range from deep purple/blue (values near 0) through
+    pink/magenta to bright yellow (values near 1). White contour lines are overlaid
+    at 15 levels to help identify precise boundaries. The title "contour-filled ·
+    letsplot · pyplots.ai" appears at the top, axis labels show "X Coordinate" and
+    "Y Coordinate", and a vertical colorbar labeled "Value" displays the mapping from
+    0 to 1.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, legend text
+          at 14-16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Filled contours with 15 bins provide excellent gradient visibility,
+          overlaid white contour lines are subtle but visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Plasma colormap is colorblind-friendly, excellent perceptual uniformity
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels ("X Coordinate", "Y Coordinate") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: No grid visible (acceptable for contour plot), legend well-placed
+          but could have more descriptive title
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled contour plot with color bands between level curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y coordinates correctly mapped to grid, z values to fill colors
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, contour lines overlaid as suggested in spec, smooth
+          color transitions
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible from -4 to 4 on both axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows value mapping 0 to 1
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "contour-filled · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows peaks, valleys, and ridges demonstrating all aspects of filled
+          contour visualization
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Mathematical function (Gaussian peaks/valleys) is plausible but generic,
+          not a real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 0-1 are sensible for normalized function output, grid spans
+          -4 to 4
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though data is deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar, geom_contourf/geom_contour, scale_fill_viridis
+          with plasma option, theme_minimal - good lets-plot usage but no advanced
+          features like tooltips or interactivity setup
+  verdict: APPROVED
diff --git a/plots/contour-filled/metadata/matplotlib.yaml b/plots/contour-filled/metadata/matplotlib.yaml
index 99c023d9cc..6245cd2b41 100644
--- a/plots/contour-filled/metadata/matplotlib.yaml
+++ b/plots/contour-filled/metadata/matplotlib.yaml
@@ -26,3 +26,178 @@ review:
   - Mathematical demonstration is generic rather than a compelling real-world scenario
     (e.g., topographic elevation, temperature field)
   - Could add contour labels (clabel) to show numerical values on contour lines
+  image_description: 'The plot displays a filled contour visualization of a 2D mathematical
+    surface. The plot uses the viridis colormap, ranging from dark purple (approximately
+    -0.6) through teal/blue, green, to bright yellow (approximately 1.5). The surface
+    shows multiple features: two prominent yellow peaks (one around coordinates (1,
+    1) and another around (-1.5, -1)), a dark purple valley/depression around (-0.5,
+    1.5), and a smaller peak around (1.5, -1.5). White contour lines are overlaid
+    on the filled regions for level identification. The X and Y axes range from -3
+    to 3, labeled as "X Position" and "Y Position" respectively. A colorbar on the
+    right shows "Surface Height (z)" values. The title correctly displays "contour-filled
+    · matplotlib · pyplots.ai". The plot uses equal aspect ratio, making the overall
+    shape square within a 16:9 canvas.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, colorbar
+          labels at appropriate sizes - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Contour levels clearly visible, 80x80 grid provides smooth color
+          transitions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is a perceptually uniform, colorblind-safe colormap
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, equal aspect ratio makes plot square leaving
+          some empty space on sides but overall balanced
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Position", "Y Position") but without units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No explicit grid (appropriate for contour), colorbar well-placed;
+          contour lines act as implicit grid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled contour plot using contourf
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly assigned to meshgrid
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: filled contours, colorbar, overlaid contour
+          lines, appropriate level count (15)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible, colorbar shows complete value range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar label correctly describes the z-values as "Surface Height
+          (z)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-filled · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent feature coverage: multiple peaks, a valley, and smooth
+          gradients demonstrating all aspects of filled contours'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Mathematical surface (Gaussian peaks/valleys) is plausible for demonstrating
+          contours but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in reasonable range (-0.8 to 1.5), coordinate range (-3 to
+          3) appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not strictly needed as data is deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses contourf and contour appropriately, but could leverage additional
+          matplotlib features like clabel for contour labels
+  verdict: APPROVED
diff --git a/plots/contour-filled/metadata/plotly.yaml b/plots/contour-filled/metadata/plotly.yaml
index 74871246cf..b9f7052999 100644
--- a/plots/contour-filled/metadata/plotly.yaml
+++ b/plots/contour-filled/metadata/plotly.yaml
@@ -24,3 +24,180 @@ review:
     context
   - Could leverage more Plotly-specific features like custom hovertemplate to show
     (x, y, z) values on interaction
+  image_description: 'The plot displays a filled contour visualization of a 2D scalar
+    field with multiple Gaussian features. The color scheme uses the Viridis colormap,
+    transitioning from dark blue/purple (low values around -0.8) through teal and
+    green to bright yellow (high values around 1.8). Two prominent peaks appear: a
+    bright yellow peak in the upper right quadrant around (1, 1) and a green-yellow
+    peak in the lower left quadrant around (-1, -1). There is also a smaller teal
+    peak in the upper left and a purple valley (negative values) in the lower right
+    quadrant around (1, -1). White contour lines with numeric labels (0.2, 0.4, 0.6,
+    etc.) overlay the filled regions. The title reads "contour-filled · plotly · pyplots.ai"
+    at the top, with "X Coordinate" and "Y Coordinate" axis labels. A vertical colorbar
+    titled "Surface Value" appears on the right side. The aspect ratio is maintained
+    as square within the plotting area.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and colorbar text are all clearly
+          readable at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; contour labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Contour lines and filled regions are clearly visible with good contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe and provides excellent perceptual
+          uniformity
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins; colorbar well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Coordinate", "Y Coordinate") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, but colorbar could have more tick marks for finer
+          value reading
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled contour plot with color bands between level curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y grid coordinates correctly mapped with Z values shown as color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, contour lines overlaid for precise level identification,
+          15 levels used
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range (-3 to 3)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows value mapping
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-filled · plotly · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Excellent data showing multiple peaks, a valley (negative values),
+          and smooth transitions - demonstrates full capability of filled contours
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Mathematical Gaussian surface is a standard, neutral example for
+          demonstrating contour plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values range appropriately from about -1 to 2, realistic for a mathematical
+          function
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: numpy and plotly.graph_objects'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: While the implementation is correct, it does not leverage Plotly's
+          distinctive interactive features in a meaningful way for the static output.
+          The HTML output enables interactivity, but no special hover templates, animations,
+          or advanced Plotly-specific features are showcased.
+  verdict: APPROVED
diff --git a/plots/contour-filled/metadata/plotnine.yaml b/plots/contour-filled/metadata/plotnine.yaml
index 7ba2a000d9..29ee52d826 100644
--- a/plots/contour-filled/metadata/plotnine.yaml
+++ b/plots/contour-filled/metadata/plotnine.yaml
@@ -25,3 +25,178 @@ review:
   - Uses matplotlib contour function directly instead of pure plotnine (acceptable
     workaround but reduces library-native score)
   - Axis labels lack units (though Coordinate is descriptive for mathematical domain)
+  image_description: 'The plot displays a filled contour visualization of a 2D scalar
+    field using the viridis colormap. The background is a blue-green gradient representing
+    lower values (around -0.25 to 0.25), with several peaks visible as bright yellow
+    regions. There are three main peaks: a bright yellow peak in the upper right quadrant
+    (around coordinates 1, 1), a yellow-green peak in the lower left quadrant (around
+    -1.2, -1), and a smaller peak in the lower right (around 1.5, -1.5). A purple
+    valley/dip is visible in the upper left area (around -0.5, 1.5) representing negative
+    values. White contour lines with labels are overlaid showing precise level values
+    (0.1, 0.2, 0.3, etc.). The colorbar on the right shows "Value" ranging from -0.25
+    to 1.00. Title reads "contour-filled · plotnine · pyplots.ai", with axis labels
+    "X Coordinate" and "Y Coordinate".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and contour labels are all clearly
+          readable at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; contour labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Smooth color transitions with well-defined regions; contour lines
+          visible but not overwhelming
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe and provides excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout but some white space could be better utilized; colorbar
+          is well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Coordinate" and "Y Coordinate" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid (appropriate for contour plots), colorbar well-placed with
+          clear title
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled contour visualization with geom_tile and overlaid
+          contour lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly mapped to coordinates and fill color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes colorbar, contour lines overlay as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range of the mathematical function
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately labeled "Value" with correct range
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-filled · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple peaks (positive), a valley (negative), and gradual
+          transitions between them
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Mathematical function is appropriate for demonstrating contour plots
+          but generic rather than a real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values from -0.25 to 1.0 are sensible for mathematical peaks/valleys
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set (though not strictly needed for deterministic
+          function)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but uses fig.savefig() directly instead of plot.save()
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's ggplot grammar with geom_tile and theme system; however,
+          the contour overlay uses matplotlib directly via fig.axes[0], which is a
+          hybrid approach
+  verdict: APPROVED
diff --git a/plots/contour-filled/metadata/pygal.yaml b/plots/contour-filled/metadata/pygal.yaml
index 9f6c8d9abd..6ae533fd22 100644
--- a/plots/contour-filled/metadata/pygal.yaml
+++ b/plots/contour-filled/metadata/pygal.yaml
@@ -27,3 +27,177 @@ review:
   - Code uses helper functions (interpolate_color, lerp) which deviates from KISS
     principle
   - Axis labels lack units - could use generic units like Position (a.u.) or similar
+  image_description: 'The plot displays a filled contour visualization on a 4800×2700
+    canvas. The title "contour-filled · pygal · pyplots.ai" appears at the top in
+    dark gray text. The main plot area shows a 2D scalar field with multiple Gaussian
+    peaks: a dominant red/dark red peak in the upper right quadrant (around x=1, y=1)
+    representing positive values up to ~1.50, and a dark blue valley in the lower
+    left quadrant (around x=-1, y=-1) representing negative values down to ~-0.98.
+    Two additional smaller peaks are visible: one reddish peak in the lower right
+    and one blue-ish region. The colormap transitions smoothly from dark blue (negative)
+    through white (near zero) to dark red (positive). Subtle contour lines overlay
+    the filled regions at approximately 0.4 opacity. Axis labels read "X Coordinate"
+    and "Y Coordinate" with tick marks from -3.0 to 3.0. A colorbar on the right side
+    labeled "Intensity" shows the value mapping from -0.98 to 1.50.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels all clearly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Filled regions and contour lines visible, contour lines could be
+          slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging blue-white-red colormap is colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, colorbar positioned
+          appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Coordinate" and "Y Coordinate" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid shown; colorbar present but contour lines are quite subtle
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled contour plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y grid with Z values correctly mapped to colors
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has filled regions, colorbar, and contour line overlay; contour lines
+          could be more visible
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar shows "Intensity" which is generic rather than describing
+          the mathematical function
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-filled · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple peaks (positive and negative), smooth gradients, demonstrates
+          both high and low regions
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Mathematical Gaussian peaks scenario is valid; slightly generic but
+          appropriate for demonstrating the technique
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values from -0.98 to 1.50 are realistic for Gaussian surface functions
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: Has helper function `interpolate_color` and nested function `lerp`;
+          not pure KISS style
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` (though data is deterministic anyway)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of pygal's SVG rendering with custom SVG injection for
+          contours; generates both PNG and interactive HTML
+  verdict: APPROVED
diff --git a/plots/contour-filled/metadata/seaborn.yaml b/plots/contour-filled/metadata/seaborn.yaml
index 330594f47e..a88fe3d4ad 100644
--- a/plots/contour-filled/metadata/seaborn.yaml
+++ b/plots/contour-filled/metadata/seaborn.yaml
@@ -28,3 +28,185 @@ review:
     - relies primarily on matplotlib for the actual contour plotting
   - Could benefit from a more application-specific scenario (e.g., topographic elevation,
     temperature field) rather than abstract mathematical function
+  image_description: 'The plot shows a filled contour visualization of a 2D scalar
+    field with multiple Gaussian peaks and a saddle region. The image uses the viridis
+    colormap ranging from dark purple (-0.346) through blue, green to yellow (0.997).
+    There are two prominent positive peaks: one in the upper-right quadrant (around
+    x=1, y=1) and another in the lower-left quadrant (around x=-1, y=-1), both reaching
+    yellow intensity values close to 1.0. A negative "valley" region is visible around
+    x=0.5, y=-0.5 appearing in dark purple. White contour lines are overlaid at low
+    opacity to show level boundaries. The colorbar is positioned on the right with
+    the label "Intensity". The title follows the correct format "contour-filled ·
+    seaborn · pyplots.ai". Axis labels show "X Coordinate" and "Y Coordinate". The
+    plot uses equal aspect ratio, making the contours appear geometrically accurate.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Contour regions are clearly visible with smooth color transitions,
+          80x80 grid provides excellent resolution
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with plot filling most of canvas, slight asymmetry due
+          to colorbar but well balanced overall
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Coordinate", "Y Coordinate") but no units
+          specified
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid disabled (appropriate for contour plots), colorbar well-placed;
+          subtle white contour lines at alpha=0.4 work well
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled contour plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y coordinates correctly form meshgrid, Z values correctly mapped
+          to colors
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has colorbar, contour lines overlay, appropriate number of levels
+          (15), smooth color transitions
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full -3 to 3 range, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows value mapping with "Intensity" label
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "contour-filled · seaborn · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows multiple Gaussian peaks and a saddle/negative region, demonstrating
+          both positive peaks and negative valleys. Could show more varied peak shapes
+          or asymmetric features
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Mathematical function surface is appropriate for demonstrating contours,
+          though axis labels are generic rather than representing a specific real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values range appropriately from -0.346 to 0.997, grid from -3 to
+          3 is sensible for Gaussian functions
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save, no functions or
+          classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) (though data is deterministic, seed is still
+          set)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, and seaborn imported, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses sns.set_theme and sns.set_context for styling, and sns.color_palette
+          for colormap. However, seaborn does not have native contour functions, so
+          this implementation relies on matplotlib's contourf. The seaborn usage is
+          limited to styling rather than core plotting functionality.
+  verdict: APPROVED
diff --git a/plots/count-basic/metadata/altair.yaml b/plots/count-basic/metadata/altair.yaml
index d3d57ba57f..8d994b6a8d 100644
--- a/plots/count-basic/metadata/altair.yaml
+++ b/plots/count-basic/metadata/altair.yaml
@@ -24,3 +24,172 @@ review:
     - the implementation creates a bar chart from pre-computed values rather than
     demonstrating Altair automatic counting capability
   - Y-axis tick marks are too granular (every 2 units) creating visual clutter
+  image_description: 'The plot displays a vertical bar chart showing survey response
+    frequencies. Five blue bars (#306998) with rounded top corners are arranged in
+    descending order by count: Good (66), Excellent (56), Average (36), Poor (27),
+    and Very Poor (15). Count labels appear above each bar in bold black text. The
+    title "count-basic · altair · pyplots.ai" is centered at the top. The X-axis is
+    labeled "Survey Response" and Y-axis "Number of Responses" with a subtle dashed
+    grid (alpha 0.3). The layout is well-balanced with good use of the canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and count annotations are all clearly readable
+          at appropriate font sizes (18-28pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Survey Response" and "Number of Responses"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is present but Y-axis has excessive tick marks (every 2 units
+          from 0-70), making it cluttered
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct count plot (vertical bars showing category frequencies)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, counts on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Bars sorted by frequency descending, count labels on bars as suggested
+          in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "count-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows varying frequencies across 5 categories demonstrating count
+          plot capabilities
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Survey responses with realistic distribution (skewed positive) is
+          plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 200 total responses with realistic proportions (25%, 35%, 20%, 12%,
+          8%)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Pre-aggregates data instead of using Altair's declarative `count()`
+          aggregation. A true Altair count plot would use `alt.Chart(df).mark_bar().encode(x='Response:N',
+          y='count()')` to let Altair handle the counting automatically.
+  verdict: APPROVED
diff --git a/plots/count-basic/metadata/bokeh.yaml b/plots/count-basic/metadata/bokeh.yaml
index 6a338f7313..040a748251 100644
--- a/plots/count-basic/metadata/bokeh.yaml
+++ b/plots/count-basic/metadata/bokeh.yaml
@@ -23,3 +23,172 @@ review:
   weaknesses:
   - Grid styling could be slightly more subtle (alpha 0.3 is at the upper end of recommended
     0.2-0.4 range)
+  image_description: 'The plot displays a vertical bar chart showing customer satisfaction
+    survey responses. Five blue bars (#306998) are arranged in descending order by
+    count: Satisfied (66), Very Satisfied (56), Neutral (36), Dissatisfied (27), and
+    Very Dissatisfied (15). Each bar has a count label positioned above it in matching
+    blue text. The title "count-basic · bokeh · pyplots.ai" is centered at the top.
+    The x-axis is labeled "Response Category" with slightly rotated category names,
+    and the y-axis is labeled "Number of Responses" with values from 0 to ~70. The
+    background is light gray (#fafafa) with subtle dashed horizontal grid lines. The
+    bars have a darker blue border and slight transparency (alpha=0.85).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 20pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels are rotated to prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths and heights are well-proportioned for the data
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good canvas utilization, minor: slightly more whitespace on right
+          side'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Response Category" and "Number of Responses"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed, but y-grid could be slightly more subtle (currently
+          adequate)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct count plot with vertical bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, counts on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Automatic counting from raw data, sorted by frequency, count labels
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0 to max+15% for label space
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series count plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "count-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied distribution across 5 categories with different frequencies;
+          could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer satisfaction survey is a perfect, realistic use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 200 total responses with realistic probability distribution
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → counting → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: ColumnDataSource, LabelSet for annotations, proper categorical axis
+          handling, both PNG and HTML export
+  verdict: APPROVED
diff --git a/plots/count-basic/metadata/highcharts.yaml b/plots/count-basic/metadata/highcharts.yaml
index 76f11e81b9..766a300360 100644
--- a/plots/count-basic/metadata/highcharts.yaml
+++ b/plots/count-basic/metadata/highcharts.yaml
@@ -25,3 +25,174 @@ review:
     of Responses" instead of just "Count")
   - Could leverage more Highcharts-specific features in the HTML output (tooltips
     are standard but animations could be showcased)
+  image_description: 'The plot displays a count/column chart showing survey response
+    frequencies with 5 categories sorted in descending order by count. The bars are
+    rendered in Python Blue (#306998) color. From left to right: "Satisfied" (24 responses,
+    tallest bar), "Very Satisfied" (12), "Neutral" (8), "Dissatisfied" (5), and "Very
+    Dissatisfied" (1, shortest bar). Each bar has a white data label showing the count
+    value positioned at the top. The title "count-basic · highcharts · pyplots.ai"
+    appears at the top in bold. The Y-axis is labeled "Count" ranging from 0-26, and
+    the X-axis is labeled "Response Category". The background is white with subtle
+    gray horizontal grid lines. The chart has good proportions with balanced margins
+    and fills the canvas well.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and data labels all clearly readable
+          at the large resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels have adequate spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good width and spacing for 5 categories
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout overall, though slightly more bottom margin than needed
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Count", "Response Category") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; legend correctly disabled for single
+          series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct column/count chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, counts on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has sorted bars (descending), count labels on bars as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to above max count (26)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "count-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in counts across categories (1-24 range), though
+          all categories have different counts which is good
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Survey satisfaction responses is a perfect real-world scenario for
+          count plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 50 total responses is reasonable; values are plausible for a small
+          survey
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → count → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded list), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts column series with data labels, proper chart configuration,
+          and correct PNG export via Selenium. Could leverage more Highcharts-specific
+          features like tooltips or animations in HTML output.
+  verdict: APPROVED
diff --git a/plots/count-basic/metadata/letsplot.yaml b/plots/count-basic/metadata/letsplot.yaml
index d129648036..6dece192eb 100644
--- a/plots/count-basic/metadata/letsplot.yaml
+++ b/plots/count-basic/metadata/letsplot.yaml
@@ -23,3 +23,161 @@ review:
     use case
   weaknesses:
   - Horizontal grid lines could use lower alpha for more subtle appearance
+  image_description: 'The plot displays a vertical bar chart showing customer satisfaction
+    ratings. Five blue bars (#306998) represent categories from left to right: "Excellent"
+    (45), "Good" (78), "Average" (52), "Poor" (23), and "Very Poor" (12). Each bar
+    has its count value displayed above it in dark gray text. The X-axis is labeled
+    "Customer Satisfaction Rating" and the Y-axis is labeled "Number of Responses"
+    with values ranging from 0 to 90. The title "count-basic · letsplot · pyplots.ai"
+    appears at the top left in bold. The plot uses a minimal theme with only horizontal
+    grid lines visible, creating a clean appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title bold at 24pt, axis titles at 20pt, tick text at 16pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with width=0.7 and alpha=0.85
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but no units (counts are unitless, acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Vertical grid removed (good), horizontal grid could be slightly more
+          subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct count/bar plot from raw categorical data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, counts on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Count labels on bars, logical category ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-90, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-color plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "count-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows varying frequencies (12-78) across 5 categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer satisfaction survey is perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response counts (12, 23, 45, 52, 78) are realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/count-basic/metadata/matplotlib.yaml b/plots/count-basic/metadata/matplotlib.yaml
index 88384eee54..04c2523d7b 100644
--- a/plots/count-basic/metadata/matplotlib.yaml
+++ b/plots/count-basic/metadata/matplotlib.yaml
@@ -22,3 +22,167 @@ review:
   - Could use more distinctive matplotlib features (hatching, gradient fills, or percentage
     annotations)
   - Minor layout inefficiency with extra whitespace on right side
+  image_description: 'The plot displays a vertical bar chart showing survey response
+    frequencies. Five blue bars (#306998) with darker edges are arranged in descending
+    order by count: "Agree" (69), "Neutral" (47), "Disagree" (39), "Strongly Agree"
+    (33), and "Strongly Disagree" (12). Bold blue count labels appear above each bar.
+    The title "count-basic · matplotlib · pyplots.ai" is centered at the top. The
+    x-axis is labeled "Survey Response" and the y-axis "Count". A subtle dashed grid
+    appears on the y-axis only. The top and right spines are removed for a cleaner
+    appearance. All text is clearly legible.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with good spacing (width=0.7)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight extra whitespace on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (counts don't require units, acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha=0.3), y-axis only is appropriate; no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct count plot (bar chart from counted data)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, counts on Y correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted by frequency, count labels on bars as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis extends to accommodate labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series count plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "count-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 5 categories with varying frequencies; could show more extreme
+          variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Survey responses (Likert scale) is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 200 responses with realistic distribution weights; counts are sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ax.bar and ax.annotate correctly but no advanced matplotlib
+          features like custom patches, hatching, or secondary annotations
+  verdict: APPROVED
diff --git a/plots/count-basic/metadata/plotly.yaml b/plots/count-basic/metadata/plotly.yaml
index 55caa464ef..f03512f233 100644
--- a/plots/count-basic/metadata/plotly.yaml
+++ b/plots/count-basic/metadata/plotly.yaml
@@ -22,3 +22,171 @@ review:
   - Could enhance Plotly interactivity with custom hover templates showing percentages
   - Y-axis label could include units (e.g., Count (n) or Frequency)
   - Bar colors could use a gradient or vary by value to better showcase Plotly capabilities
+  image_description: 'The plot displays a vertical bar chart showing survey response
+    counts. Five blue bars (#306998 with darker outlines) are arranged from highest
+    to lowest frequency: Agree (66), Strongly Agree (56), Neutral (36), Disagree (27),
+    and Strongly Disagree (15). Each bar has its count value displayed above it in
+    large text. The title "count-basic · plotly · pyplots.ai" is centered at the top.
+    The x-axis is labeled "Survey Response" and y-axis is labeled "Count". The background
+    uses the plotly_white template with subtle horizontal gridlines. The layout is
+    clean and well-proportioned.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and count annotations are all clearly
+          readable at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized with good spacing (bargap=0.3)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme with good contrast; no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units (e.g., "Count (n)" would be
+          better)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha=0.1), no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct count plot using vertical bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, counts on Y-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted by frequency descending, count labels on bars as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to above max count
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, appropriately omitted
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "count-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying frequencies across 5 categories; could show more extreme
+          variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Survey responses (Likert scale) is a realistic, commonly understood
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 200 total responses is reasonable; distribution probabilities are
+          realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Bar correctly but doesn't leverage Plotly's interactive features
+          like hover templates or animations
+  verdict: APPROVED
diff --git a/plots/count-basic/metadata/plotnine.yaml b/plots/count-basic/metadata/plotnine.yaml
index ac0b250699..cd3c93ae09 100644
--- a/plots/count-basic/metadata/plotnine.yaml
+++ b/plots/count-basic/metadata/plotnine.yaml
@@ -24,3 +24,141 @@ review:
   weaknesses:
   - 'Minor: Yellow bar color (#FFD43B) on white background has slightly reduced contrast
     compared to blue bars'
+  image_description: 'The plot displays a vertical bar chart showing customer survey
+    responses. Five bars represent response categories from left to right: "Excellent"
+    (45, dark blue), "Good" (78, medium blue), "Average" (52, lighter blue), "Poor"
+    (23, pale blue), and "Very Poor" (12, yellow/gold). Each bar has its count value
+    displayed above it. The title "count-basic · plotnine · pyplots.ai" appears at
+    the top. The x-axis is labeled "Customer Response" and the y-axis "Number of Responses".
+    The background is clean white with subtle horizontal grid lines, using theme_minimal
+    styling.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text perfectly readable, appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bars well-sized with good spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: gradient is accessible, yellow on white slightly reduced contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent canvas utilization with 16:9 aspect ratio
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive labels for both axes
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: subtle grid, legend appropriately hidden
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct count plot using geom_bar
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: count labels, logical ordering, adequate spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: legend correctly omitted
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'uses correct format: count-basic · plotnine · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows variation across all 5 categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: plausible product feedback survey scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: realistic response counts
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: deterministic hardcoded data
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/count-basic/metadata/pygal.yaml b/plots/count-basic/metadata/pygal.yaml
index 065af9e7f1..e21ea310af 100644
--- a/plots/count-basic/metadata/pygal.yaml
+++ b/plots/count-basic/metadata/pygal.yaml
@@ -23,3 +23,173 @@ review:
   - Grid lines could be more subtle (lower alpha/opacity)
   - Axis labels could include units for clarity (e.g., Number of Responses instead
     of Count)
+  image_description: The plot displays a vertical bar chart showing customer satisfaction
+    survey responses across 5 categories ordered logically from "Very Dissatisfied"
+    to "Very Satisfied". The bars are rendered in a consistent muted blue color (#306998)
+    with count values (1, 5, 8, 21, 15) displayed on top of each bar. The title "count-basic
+    · pygal · pyplots.ai" appears at the top center. The x-axis is labeled "Satisfaction
+    Level" and y-axis is labeled "Count". Horizontal dotted grid lines aid readability.
+    The chart uses a white background with good spacing between bars and balanced
+    margins.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and value labels are all clearly
+          readable at the large canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all category labels and values are
+          well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are appropriately sized with good width and spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme with good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units (Count could be "Number of
+          Responses")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (dotted lines), legend correctly hidden for single-series
+          count plot; minor deduction for grid being slightly prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct bar chart type for count plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, counts on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Automatic counting from raw data, count labels on bars, logical category
+          ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden for single-series count plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "count-basic · pygal · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across all 5 satisfaction levels with different frequencies;
+          could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer satisfaction survey is a perfect real-world scenario for
+          count plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 50 responses is realistic; counts are plausible for a survey
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded list), no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only Counter, pygal, and Style are imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses pygal's Style customization, print_values, and value_formatter;
+          could leverage more pygal-specific features like tooltips or custom rendering
+  verdict: APPROVED
diff --git a/plots/count-basic/metadata/seaborn.yaml b/plots/count-basic/metadata/seaborn.yaml
index 57096ee409..c09e71ff9e 100644
--- a/plots/count-basic/metadata/seaborn.yaml
+++ b/plots/count-basic/metadata/seaborn.yaml
@@ -21,3 +21,172 @@ review:
   weaknesses:
   - Does not leverage seaborn distinctive features like hue parameter or palette customization
   - Y-axis label could be more specific (e.g., Response Count or Frequency)
+  image_description: 'The plot displays a count plot (bar chart) showing survey responses
+    for preferred programming languages. Eight vertical bars are displayed in descending
+    order by frequency: Python (140), JavaScript (101), Java (86), Go (43), C++ (42),
+    Rust (33), TypeScript (31), and Ruby (24). The bars are a consistent blue color
+    (#306998). Each bar has a count label positioned above it. The x-axis is labeled
+    "Programming Language" and the y-axis is labeled "Number of Responses". The title
+    follows the required format "count-basic · seaborn · pyplots.ai". A subtle horizontal
+    dashed grid is visible behind the bars, and the top and right spines have been
+    removed for a cleaner appearance. The layout is well-balanced with good use of
+    canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all category labels clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized, count labels clearly visible above each bar
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (counts don't need units, but could
+          specify "count" or "frequency")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle grid with alpha 0.3, dashed style; no legend needed for single-color
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct count plot using sns.countplot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to x-axis, counts to y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted by frequency (descending), count labels on bars as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "count-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows varying frequencies across 8 categories with good distribution
+          from high to low
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language survey is a relatable, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 500 responses distributed across 8 languages with realistic weights
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.countplot which is the appropriate seaborn function, but
+          this is basic usage without leveraging seaborn's distinctive features like
+          hue for grouping, statistical estimation, or palette customization
+  verdict: APPROVED
diff --git a/plots/dendrogram-basic/metadata/altair.yaml b/plots/dendrogram-basic/metadata/altair.yaml
index 7b00c9c1a3..40c3efe9c7 100644
--- a/plots/dendrogram-basic/metadata/altair.yaml
+++ b/plots/dendrogram-basic/metadata/altair.yaml
@@ -21,3 +21,165 @@ review:
   weaknesses:
   - X-axis labels could be slightly larger for better readability at full resolution
   - Minor empty space on right side of plot could be reduced
+  image_description: 'The plot displays a hierarchical dendrogram with 15 iris flower
+    samples (5 each of Setosa, Versicolor, and Virginica). The vertical tree structure
+    shows "Distance (Ward)" on the Y-axis (0-11 range). Sample labels appear at the
+    bottom rotated 45 degrees. A two-color scheme distinguishes cluster levels: blue
+    (#306998) for high-level merges above the threshold, yellow/gold (#FFD43B) for
+    lower-level clusters. Species group appropriately: Setosa clusters on the left,
+    Versicolor and Virginica on the right, with all merging at ~10.1 distance.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and Y-axis clear; x-axis labels slightly small when rotated
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All labels well-spaced, no overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line thickness appropriate, U-shapes clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow colorblind-safe scheme
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, minor empty space on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Distance (Ward)" descriptive but unitless'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle dashed grid, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct hierarchical dendrogram structure
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels on X, distances on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Tree structure, proportional branch heights, leaf labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full Y-axis range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A, color coding self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"dendrogram-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Clear hierarchical structure with distinct merge levels
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic iris dataset scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 15 samples within recommended range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Creative use of mark_rule with x/x2/y/y2 encoding and layering, but
+          workaround since Altair lacks native dendrogram support
+  verdict: APPROVED
diff --git a/plots/dendrogram-basic/metadata/bokeh.yaml b/plots/dendrogram-basic/metadata/bokeh.yaml
index a917b67ffd..f35c2b3622 100644
--- a/plots/dendrogram-basic/metadata/bokeh.yaml
+++ b/plots/dendrogram-basic/metadata/bokeh.yaml
@@ -25,3 +25,174 @@ review:
   - X-axis label Sample is generic; could be more descriptive like Iris Samples
   - No legend or annotation explaining the blue/yellow color threshold meaning
   - Could use ColumnDataSource for more idiomatic Bokeh code
+  image_description: 'The plot displays a hierarchical dendrogram visualizing clustering
+    of 15 iris flower samples across three species (Setosa, Versicolor, Virginica).
+    The tree structure uses a two-color scheme: blue (#306998) for high-level merges
+    above the color threshold and yellow/gold (#FFD43B) for lower-level within-cluster
+    merges. The Y-axis shows "Distance (Ward)" ranging from 0 to approximately 10,
+    with dashed grid lines. The X-axis is labeled "Sample" with 15 rotated labels
+    (45°) showing sample names like "Setosa-1", "Versicolor-4", "Virginica-3", etc.
+    The title "dendrogram-basic · bokeh · pyplots.ai" appears at the top left. The
+    dendrogram correctly shows Setosa samples clustering separately on the left, while
+    Versicolor and Virginica samples intermix on the right before merging at a higher
+    distance level - biologically accurate for iris data.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable; rotated sample
+          labels are slightly small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; rotated labels fit well
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 4 makes dendrogram branches clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight extra whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Distance (Ward)" is descriptive but "Sample" is generic'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Dashed grid is subtle (alpha 0.3), no legend needed but color meaning
+          unexplained
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dendrogram/tree visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Samples on X-axis, merge distances on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchical structure, branch heights proportional to merge distances,
+          labels present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to max distance
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend required for this plot type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "dendrogram-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical merging at multiple levels, cluster separation
+          visible; could show more diversity in merge distances
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris flower species is a classic, real-world clustering example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 15 samples is within recommended 10-50 range; Ward distances are
+          realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → linkage → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (minor issue, but PNG is correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Manual rendering with Bokeh figure and line glyphs; generates HTML
+          output for interactivity; however, could use ColumnDataSource pattern more
+  verdict: APPROVED
diff --git a/plots/dendrogram-basic/metadata/highcharts.yaml b/plots/dendrogram-basic/metadata/highcharts.yaml
index 5dccadcf38..c6156c59c0 100644
--- a/plots/dendrogram-basic/metadata/highcharts.yaml
+++ b/plots/dendrogram-basic/metadata/highcharts.yaml
@@ -15,3 +15,179 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: 'The plot displays a dendrogram (hierarchical clustering tree)
+    with a white background. The title "dendrogram-basic · highcharts · pyplots.ai"
+    appears at the top center. The Y-axis is labeled "Distance (Ward)" ranging from
+    0 to approximately 10.75, and the X-axis is labeled "Sample". The dendrogram shows
+    15 leaf nodes at the bottom representing iris flower samples: Setosa-4, Setosa-3,
+    Setosa-5, Setosa-1, Setosa-2, Versicolor-1, Versicolor-4, Versicolor-5, Versicolor-2,
+    Versicolor-3, Virginica-1, Virginica-3, Virginica-2, Virginica-4, and Virginica-5.
+    The tree structure is drawn in Python Blue (#306998) with clear U-shaped connections
+    showing the hierarchical merging pattern. The Setosa samples cluster together
+    on the left, while Versicolor and Virginica samples form a separate major cluster
+    on the right, with the two main clusters merging at the highest distance (~10).
+    The branch heights correctly represent the Ward linkage distances. Labels are
+    rotated at 45 degrees for readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 32
+      max: 35
+      items:
+      - id: VQ-01
+        name: Meaningful axis labels
+        score: 7
+        max: 7
+        passed: true
+        comment: '"Distance (Ward)" and "Sample" are descriptive'
+      - id: VQ-02
+        name: No overlapping text
+        score: 5
+        max: 6
+        passed: true
+        comment: Labels readable but slightly crowded at bottom
+      - id: VQ-03
+        name: Color choice
+        score: 5
+        max: 5
+        passed: true
+        comment: Python Blue (#306998) is colorblind-safe
+      - id: VQ-04
+        name: Clear data elements
+        score: 5
+        max: 5
+        passed: true
+        comment: Lines are thick (4px) and clearly visible
+      - id: VQ-05
+        name: Layout balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall but large bottom margin
+      - id: VQ-06
+        name: Grid subtlety
+        score: 3
+        max: 3
+        passed: true
+        comment: Subtle grid lines with low alpha
+      - id: VQ-07
+        name: Legend placement
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly disabled
+      - id: VQ-08
+        name: Image size
+        score: 1
+        max: 2
+        passed: true
+        comment: 4785x2646, close to but not exactly 4800x2700
+    spec_compliance:
+      score: 33
+      max: 35
+      items:
+      - id: SC-01
+        name: Correct plot type
+        score: 10
+        max: 10
+        passed: true
+        comment: Dendrogram correctly rendered with tree structure
+      - id: SC-02
+        name: Data mapped correctly
+        score: 7
+        max: 7
+        passed: true
+        comment: Linkage matrix properly converted to visual coordinates
+      - id: SC-03
+        name: Required features present
+        score: 7
+        max: 7
+        passed: true
+        comment: Shows hierarchical clustering, merge distances, labels
+      - id: SC-04
+        name: Data range appropriate
+        score: 4
+        max: 4
+        passed: true
+        comment: Y-axis shows full distance range with padding
+      - id: SC-05
+        name: Legend accuracy
+        score: 2
+        max: 4
+        passed: true
+        comment: No legend needed for this plot type, but N/A
+      - id: SC-06
+        name: Title format correct
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses correct format
+    data_quality:
+      score: 15
+      max: 15
+      items:
+      - id: DQ-01
+        name: Feature coverage
+        score: 6
+        max: 6
+        passed: true
+        comment: Shows multiple cluster levels, varying merge distances
+      - id: DQ-02
+        name: Realistic context
+        score: 5
+        max: 5
+        passed: true
+        comment: Iris flower species is a classic clustering example
+      - id: DQ-03
+        name: Appropriate scale
+        score: 4
+        max: 4
+        passed: true
+        comment: 15 samples, 3 species groups, sensible measurements
+    code_quality:
+      score: 13
+      max: 15
+      items:
+      - id: CQ-01
+        name: KISS structure
+        score: 4
+        max: 4
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducible
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Library idioms
+        score: 3
+        max: 3
+        passed: true
+        comment: Proper use of highcharts-core API
+      - id: CQ-04
+        name: Clean imports
+        score: 1
+        max: 2
+        passed: true
+        comment: LineSeries imported from area module (unusual but works)
+      - id: CQ-05
+        name: Helpful comments
+        score: 1
+        max: 1
+        passed: true
+        comment: Data generation sections documented
+      - id: CQ-06
+        name: No deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-07
+        name: Output correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png AND plot.html (html is fine, but screenshot method
+          differs from standard)
+  verdict: APPROVED
diff --git a/plots/dendrogram-basic/metadata/letsplot.yaml b/plots/dendrogram-basic/metadata/letsplot.yaml
index 6eb593ba1b..c8460d73c4 100644
--- a/plots/dendrogram-basic/metadata/letsplot.yaml
+++ b/plots/dendrogram-basic/metadata/letsplot.yaml
@@ -25,3 +25,175 @@ review:
     should start at 0 or slightly below label positions
   - X-axis label Sample is generic; could be more descriptive like Iris Samples or
     simply removed since individual labels are shown
+  image_description: 'The plot displays a hierarchical dendrogram showing clustering
+    of 15 iris flower samples across three species (Setosa, Versicolor, Virginica).
+    The tree structure uses two colors: blue (#306998) for the top-level merge connecting
+    the Setosa cluster to the Versicolor/Virginica cluster at distance ~10, and yellow/gold
+    (#FFD43B) for all lower-level merges. The Y-axis shows "Distance (Ward)" ranging
+    from -2 to 12, and the X-axis shows "Sample" with individual sample labels (e.g.,
+    Setosa-1, Versicolor-3, Virginica-5) displayed at a 35-degree angle. The title
+    "dendrogram-basic · letsplot · pyplots.ai" appears at the top. The hierarchical
+    structure clearly shows Setosa samples clustering together on the left, while
+    Versicolor and Virginica samples form a separate major branch on the right, which
+    is scientifically accurate for iris data.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Sample labels are angled to avoid overlap, all text is readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line segments are appropriately sized (1.5 width), colors are distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall, but Y-axis extends into negative values unnecessarily
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Distance (Ward)" is descriptive, but "Sample" is generic'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid on Y-axis only is appropriate, but X-axis grid lines removed
+          while labels remain could be cleaner
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dendrogram/hierarchical clustering visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Samples on X-axis, distance on Y-axis, correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Tree structure, branch heights proportional to merge distances, labels
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, complete tree structure shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, colors are self-explanatory (threshold-based)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "dendrogram-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical structure, different merge distances, cluster
+          separation; could show more varied within-cluster distances
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris flower species clustering is a classic, well-known example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Ward distances are reasonable, though the spread is quite compressed
+          at lower levels
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → linkage → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' but with path='.' parameter which is redundant
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of geom_segment, geom_text, scale_color_manual, and theme
+          customization. However, doesn't leverage lets-plot's interactive features
+          in the HTML export or more advanced theming capabilities.
+  verdict: APPROVED
diff --git a/plots/dendrogram-basic/metadata/matplotlib.yaml b/plots/dendrogram-basic/metadata/matplotlib.yaml
index 7364d5db76..9693377807 100644
--- a/plots/dendrogram-basic/metadata/matplotlib.yaml
+++ b/plots/dendrogram-basic/metadata/matplotlib.yaml
@@ -25,3 +25,174 @@ review:
     figure/axes setup
   - Y-axis label could be more descriptive (e.g., Ward Linkage Distance or include
     clustering interpretation hints)
+  image_description: 'The plot displays a dendrogram (hierarchical clustering tree)
+    with 15 iris flower samples on the x-axis and Ward distance on the y-axis (ranging
+    0-10). The tree structure shows three distinct color-coded clusters: orange branches
+    for Setosa samples (left side), green branches for Versicolor and Virginica samples
+    (center and right). The blue horizontal line at the top connects the Setosa cluster
+    to the Versicolor/Virginica cluster at distance ~10. Sample labels are rotated
+    45 degrees and clearly readable. The title "dendrogram-basic · matplotlib · pyplots.ai"
+    appears at the top. The plot has a clean white background with subtle dashed horizontal
+    grid lines on the y-axis. Top and right spines are removed for a cleaner appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 14-16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Sample labels are rotated 45° preventing any overlap, all text fully
+          readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dendrogram branches are clearly visible with good line weights
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Orange, green, and blue are colorblind-safe and distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of 16:9 aspect ratio, well-proportioned margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Sample" and "Distance (Ward)" are descriptive but y-axis could
+          include units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid on y-axis only (alpha=0.3), no legend needed for
+          this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dendrogram/hierarchical clustering visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Samples on x-axis, merge distances on y-axis as expected
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses scipy.cluster.hierarchy, shows branch heights proportional to
+          distances, vertical orientation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 samples visible, full distance range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; cluster colors are self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "dendrogram-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows hierarchical structure with clear cluster separations at different
+          distance levels
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Iris flower species is a classic, well-known clustering example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 15 samples (within recommended 10-50), realistic iris measurements
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → linkage → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, and scipy.cluster.hierarchy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs are current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses scipy's dendrogram function directly; matplotlib is used only
+          for figure setup and styling, not for any distinctive matplotlib-specific
+          visualization features
+  verdict: APPROVED
diff --git a/plots/dendrogram-basic/metadata/plotly.yaml b/plots/dendrogram-basic/metadata/plotly.yaml
index 5e767e2605..0486bb22a5 100644
--- a/plots/dendrogram-basic/metadata/plotly.yaml
+++ b/plots/dendrogram-basic/metadata/plotly.yaml
@@ -23,3 +23,173 @@ review:
   - Missing legend to explain what the cluster colors represent
   - Unconventional inline lambda import for scipy linkage function
   - Could add hover information to display sample details interactively
+  image_description: 'The plot displays a dendrogram (hierarchical clustering tree)
+    with a clean white background. The title "dendrogram-basic · plotly · pyplots.ai"
+    is centered at the top in black text. The y-axis shows "Distance (Ward)" ranging
+    from 0 to ~10, and the x-axis is labeled "Iris Flower Samples" with 15 sample
+    names rotated at -45 degrees. The dendrogram uses three distinct colors to highlight
+    clusters: green for Setosa samples (leftmost cluster), red/coral for Virginica
+    samples (middle), and cyan/teal for Versicolor samples (rightmost). The tree structure
+    clearly shows how samples merge at different distance levels, with the Setosa
+    cluster being most distinct (joining the others at distance ~10), while Virginica
+    and Versicolor clusters merge at distance ~4. Line widths are thick enough for
+    good visibility.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; tick labels slightly
+          small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; rotated x-axis labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 makes branches clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Three distinct colors (green, red, cyan) are colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions; bottom margin accommodates rotated labels well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Distance (Ward)" and "Iris Flower Samples"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid visible (acceptable for dendrogram), but no legend explaining
+          cluster colors
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dendrogram/hierarchical clustering visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels on x-axis, distances on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows hierarchical structure, merge distances, proper tree layout
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 samples visible, full distance range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Cluster coloring is self-explanatory from labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "dendrogram-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear clustering hierarchy with three distinct groups; could
+          show more variation in merge distances within clusters
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris flower dataset is a classic clustering example; species names
+          are realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Ward distances are reasonable; measurements simulate real iris dimensions
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.figure_factory imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses inline lambda import which is unconventional
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses plotly's ff.create_dendrogram
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of figure_factory, but could leverage more interactivity
+          features or hover info
+  verdict: APPROVED
diff --git a/plots/dendrogram-basic/metadata/plotnine.yaml b/plots/dendrogram-basic/metadata/plotnine.yaml
index afb4539dab..4360689570 100644
--- a/plots/dendrogram-basic/metadata/plotnine.yaml
+++ b/plots/dendrogram-basic/metadata/plotnine.yaml
@@ -26,3 +26,175 @@ review:
   - The color threshold logic (0.7 * max) is somewhat arbitrary and unexplained visually
   - Bottom margin has extra whitespace due to rotated labels extending into the plot
     area
+  image_description: 'The plot displays a hierarchical dendrogram visualizing clustering
+    of 15 iris flower samples (5 each of Setosa, Versicolor, and Virginica). The tree
+    structure uses a two-color scheme: Python Blue (#306998) for high-level merges
+    at the top and Python Yellow (#FFD43B) for lower-level merges. The Y-axis shows
+    "Distance (Ward)" ranging from 0 to 12, and the X-axis is labeled "Sample". Sample
+    labels are displayed at 45-degree angles at the bottom (e.g., Setosa-4, Setosa-3,
+    Versicolor-1, Virginica-1, etc.). The title reads "dendrogram-basic · plotnine
+    · pyplots.ai". The layout is clean with subtle dashed grid lines on the Y-axis.
+    The dendrogram correctly shows Setosa samples clustering together on the left,
+    while Versicolor and Virginica samples cluster on the right side, reflecting their
+    biological similarity.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels are well-spaced with 45-degree rotation
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dendrogram branches clearly visible with size=1.8
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but slight extra whitespace at bottom due to label
+          rotation
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with method "Distance (Ward)" but no
+          units; X-axis just "Sample"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha=0.3 and dashed style; no legend needed
+          but color meaning unexplained
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dendrogram/hierarchical clustering visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch heights correctly represent merge distances
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows hierarchical structure, merge distances, labeled leaves
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range of distances
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; color distinction is decorative
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "dendrogram-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows species clustering well; could show more variation in merge
+          heights within species
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris flower classification is a classic, realistic ML scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Ward distances are reasonable; values make sense for normalized data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → linkage → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Missing bbox_inches equivalent for tight layout
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of geom_segment, geom_text, theme customization, and scale_color_manual.
+          However, relies heavily on scipy for dendrogram coordinates rather than
+          showcasing more plotnine-specific grammar of graphics features.
+  verdict: APPROVED
diff --git a/plots/dendrogram-basic/metadata/pygal.yaml b/plots/dendrogram-basic/metadata/pygal.yaml
index 7f815b2f36..35b87aa7e3 100644
--- a/plots/dendrogram-basic/metadata/pygal.yaml
+++ b/plots/dendrogram-basic/metadata/pygal.yaml
@@ -25,3 +25,177 @@ review:
     lines are acceptable but could use lower opacity
   - The dendrogram branches in the lower portion are somewhat cramped compared to
     the upper sparse area - inherent to the data distribution
+  image_description: The plot displays a well-formed dendrogram showing hierarchical
+    clustering of 15 iris flower samples across three species (Setosa, Versicolor,
+    Virginica). The visualization uses Python Blue (#306998) for all dendrogram branches
+    against a clean white background. The title "dendrogram-basic · pygal · pyplots.ai"
+    appears at the top in a readable font. The y-axis is labeled "Distance (Ward's
+    Method)" ranging from 0 to ~10, and the x-axis is labeled "Sample" with 15 rotated
+    sample labels (45°). The tree structure clearly shows three main clusters corresponding
+    to the three iris species, with Setosa samples clustering together on the left,
+    followed by Versicolor in the middle, and Virginica on the right. Branch heights
+    accurately reflect merge distances from Ward's method, with the final merge at
+    distance ~10 connecting the Setosa cluster to the Versicolor-Virginica supercluster.
+    Horizontal grid lines (y-guides) are subtle and aid in reading distance values.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; x-axis tick labels are
+          well-sized with appropriate rotation
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; 45° rotation prevents label collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dendrogram branches are clearly visible with good stroke width (5px)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions; slight excess whitespace in upper portion but acceptable
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with method "Distance (Ward's Method)";
+          X-axis labeled "Sample"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed for this plot type; y-guides are subtle but the
+          horizontal dotted lines could be slightly more subtle (alpha appears ~0.4-0.5)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dendrogram/hierarchical clustering visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Samples on x-axis, merge distances on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows hierarchical structure, merge heights, and sample labels as
+          specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible, appropriate y-axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for dendrogram (no legend required)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "dendrogram-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows hierarchical clustering with clear species separation; could
+          show more varied merge distances within clusters
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris flower measurements - classic, realistic dataset for clustering
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Ward's method distances are reasonable; values appropriate for normalized
+          biological measurements
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → linkage → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, scipy)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct outputs)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Creative use of XY chart with manual segment drawing; custom Style
+          configuration; however, this is a workaround since pygal lacks native dendrogram
+          support
+  verdict: APPROVED
diff --git a/plots/dendrogram-basic/metadata/seaborn.yaml b/plots/dendrogram-basic/metadata/seaborn.yaml
index 97c820d08f..0cd3849c61 100644
--- a/plots/dendrogram-basic/metadata/seaborn.yaml
+++ b/plots/dendrogram-basic/metadata/seaborn.yaml
@@ -24,3 +24,174 @@ review:
   weaknesses:
   - Legend color markers use squares but appear slightly different shade than the
     x-axis label colors (minor visual inconsistency)
+  image_description: 'The plot displays a hierarchical dendrogram visualizing clustering
+    of 30 iris flower samples (10 from each species: Setosa, Versicolor, and Virginica).
+    The dendrogram uses Ward linkage distances on the y-axis (ranging from 0 to ~15).
+    The tree structure clearly shows Setosa samples clustering together on the left
+    with low merge distances (~1.5), while Versicolor and Virginica samples cluster
+    together on the right with higher merge distances (~6-7), reflecting that these
+    two species are more similar to each other than to Setosa. Branch colors correspond
+    to clusters, with orange branches for Setosa, green branches for Versicolor/Virginica
+    clusters. X-axis labels are rotated 45 degrees and color-coded by species (teal/blue
+    for Setosa, orange for Versicolor, green for Virginica). A legend in the upper
+    right identifies the species. The title follows the required format "dendrogram-basic
+    · seaborn · pyplots.ai". Grid lines are subtle and present only on the y-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 14-16pt, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: X-axis labels rotated 45°, no overlapping text
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dendrogram branches well-sized, species-colored labels enhance visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn colorblind palette, teal/orange/green distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions, slight asymmetry due to clustering pattern (natural)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive: "Iris Samples (by Species)" and "Distance (Ward Linkage)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend uses blue square markers but Setosa labels appear teal/blue
+          in plot; slight color mismatch
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dendrogram/hierarchical clustering visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Samples on x-axis, merge distances on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels, linkage matrix, branch heights proportional to distances
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full y-axis range shown (0-15)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly identifies three iris species
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "dendrogram-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows hierarchical structure with varying merge distances, clear
+          species clustering
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic iris dataset, meaningful biological clustering example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 30 samples within recommended 10-50 range, Ward distances realistic
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: sklearn.datasets imported but could use simpler approach; seaborn
+          not heavily utilized
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn styling
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_theme, sns.set_context, sns.color_palette, sns.despine;
+          however, core dendrogram is from scipy (seaborn has no native dendrogram),
+          seaborn primarily used for theming and aesthetics
+  verdict: APPROVED
diff --git a/plots/density-basic/metadata/altair.yaml b/plots/density-basic/metadata/altair.yaml
index 955bc1a2bc..dc29d2261c 100644
--- a/plots/density-basic/metadata/altair.yaml
+++ b/plots/density-basic/metadata/altair.yaml
@@ -23,3 +23,172 @@ review:
   - Some test score values extend beyond typical 0-100 range due to normal distribution
     tails
   - Rug plot (optional per spec) not included
+  image_description: 'The plot displays a bimodal density curve (KDE) in Python Blue
+    (#306998) with a filled area and darker stroke outline. The title "density-basic
+    · altair · pyplots.ai" appears at the top in appropriate font size. The X-axis
+    shows "Test Score (points)" ranging from approximately 10 to 110, and the Y-axis
+    displays "Probability Density" from 0.000 to 0.026. Two distinct peaks are visible:
+    a primary peak around 34-36 and a secondary peak around 66-70, demonstrating clear
+    bimodal distribution. The area under the curve has 70% opacity fill with a subtle
+    grid in the background. All text elements (title, axis labels, tick labels) are
+    clearly legible.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curve clearly visible with good opacity and stroke
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though slight extra whitespace on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Test Score (points)", "Probability
+          Density"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3), but no legend present (N/A for single
+          series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density/KDE plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X correctly shows values, Y shows probability density
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth curve, fill under curve with transparency as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "density-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Bimodal distribution demonstrates density estimation well, though
+          rug plot (optional) not included
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores from two groups is a plausible, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 10-100 sensible for test scores, though some values extend
+          beyond 100
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png ✓ (actually correct)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses transform_density for KDE, declarative encoding, tooltips, interactive
+          HTML export
+  verdict: APPROVED
diff --git a/plots/density-basic/metadata/bokeh.yaml b/plots/density-basic/metadata/bokeh.yaml
index 4e964445b1..b005882cff 100644
--- a/plots/density-basic/metadata/bokeh.yaml
+++ b/plots/density-basic/metadata/bokeh.yaml
@@ -24,3 +24,173 @@ review:
   - Grid styling could be more prominent for better readability
   - The rug plot ticks could be slightly more visible (currently line_width=2 with
     alpha=0.5)
+  image_description: The plot displays a bimodal density curve (KDE) showing web service
+    response times in milliseconds. The main peak is around 150ms (fast responses)
+    with a secondary smaller peak around 280ms (slower responses). The curve is filled
+    with a semi-transparent blue color (#306998) with a solid blue outline. A rug
+    plot showing individual observations appears as small vertical lines along the
+    x-axis at the bottom. The title "density-basic · bokeh · pyplots.ai" is positioned
+    at the top left. X-axis is labeled "Response Time (ms)" and Y-axis is labeled
+    "Density". The background is a light gray (#fafafa) with subtle dashed grid lines.
+    The layout uses the full 16:9 landscape format.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curve and rug plot are well-sized and visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" and "Density" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3), but no legend present (acceptable for
+          single-series density plot, but could note the distribution)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct KDE/density plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable correctly mapped to density
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth curve with fill, rug plot included as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "density-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bimodality effectively, demonstrates distribution characteristics
+          well. Could show more extreme values/outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Web service response times is an excellent, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 50-400ms are realistic for web response times, though the
+          bimodal distribution is somewhat idealized
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → KDE calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and varea/line/segment glyphs correctly, but
+          doesn't leverage Bokeh's interactive features (hover tooltips, interactive
+          HTML output). For static PNG export, the implementation is solid but doesn't
+          showcase Bokeh's unique strengths
+  verdict: APPROVED
diff --git a/plots/density-basic/metadata/highcharts.yaml b/plots/density-basic/metadata/highcharts.yaml
index de42d6ba62..d87de5500b 100644
--- a/plots/density-basic/metadata/highcharts.yaml
+++ b/plots/density-basic/metadata/highcharts.yaml
@@ -25,3 +25,180 @@ review:
   - Rug plot markers could be slightly larger for better visibility at the plot scale
   - The bimodal distribution is not clearly visible as two distinct peaks (appears
     more as a single peak with shoulder)
+  image_description: The plot displays a density curve (Kernel Density Estimation)
+    representing height distribution in centimeters. The visualization uses a blue
+    gradient-filled area chart transitioning from a darker blue (rgba ~0.6 opacity)
+    at the top to a lighter blue (~0.1 opacity) at the bottom. The X-axis is labeled
+    "Height (cm)" ranging from approximately 136 to 212, and the Y-axis is labeled
+    "Density" ranging from 0 to about 0.041. A rug plot is displayed along the bottom
+    using yellow diamond markers to show individual observations. The title "density-basic
+    · highcharts · pyplots.ai" is prominently displayed at the top. A legend in the
+    top-right corner identifies "Density Curve" and "Observations (Rug)". The distribution
+    shows a bimodal characteristic (combining simulated male/female heights) that
+    manifests as a main peak around 168-170 cm with a shoulder extending toward higher
+    values around 178-180 cm.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          resolution with appropriate font sizes (72px title, 48px axis titles, 36px
+          labels)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend is well-positioned and doesn't
+          obscure data
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Density curve is clearly visible with good line width (5px); rug
+          plot markers are visible but slightly small relative to the plot area
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and provide
+          excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with appropriate margins; no content cut-off
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Height (cm)" and "Density"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid opacity is 0.25 which is slightly too prominent; the grid appears
+          quite dense with many horizontal lines
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density/KDE plot type implemented as area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows height values, Y-axis shows density values correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth density curve with fill, rug plot for individual observations
+          as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range is visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Density Curve" and "Observations (Rug)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "density-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution (male/female heights mixture), though
+          the bimodality could be more pronounced; demonstrates smoothing well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Heights in centimeters with realistic distribution for human population
+          (165cm female, 178cm male means)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Height values are sensible (140-200cm range); density values are
+          correct probability densities
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → KDE calculation → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses AreaSeries with gradient fill, proper chart options configuration,
+          ScatterSeries for rug plot, interactive HTML export
+  verdict: APPROVED
diff --git a/plots/density-basic/metadata/letsplot.yaml b/plots/density-basic/metadata/letsplot.yaml
index 90415f5650..6e7a9a2a9b 100644
--- a/plots/density-basic/metadata/letsplot.yaml
+++ b/plots/density-basic/metadata/letsplot.yaml
@@ -23,3 +23,174 @@ review:
   - Test score values exceeding 100 are unrealistic for typical grading scales
   - Could benefit from a rug plot to show individual observations as suggested in
     spec notes
+  image_description: The plot displays a smooth kernel density estimation curve showing
+    the distribution of test scores. The x-axis shows "Test Score" ranging from approximately
+    30 to 120, while the y-axis shows "Density" ranging from 0 to about 0.026. The
+    density curve is filled with Python blue (#306998) with appropriate transparency,
+    and the outline is the same color. The distribution is bimodal, with a primary
+    peak around 75 and a secondary peak around 88, reflecting the mixture of a main
+    student group and high achievers. The title "density-basic · letsplot · pyplots.ai"
+    is positioned at the top. The background is clean with subtle gray gridlines on
+    a minimal theme. The overall layout has good proportions in 16:9 aspect ratio.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curve is smooth and well-visible with good fill transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no accessibility issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but no units (e.g., "Test Score (points)" would
+          be better)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Minor gridlines disabled which is good, but major grid alpha could
+          be slightly more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density/KDE plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable correctly mapped to x-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth curve with fill under curve as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single variable
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "density-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution demonstrating skewness and multiple peaks,
+          but no rug plot (optional per spec)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores is a perfect real-world scenario for density plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: false
+        comment: Good range (30-120), though some extreme values exceed 100 which
+          is unusual for test scores
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html, but uses export_ggsave from lets_plot.export
+          instead of standard ggsave
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot2 grammar correctly with geom_density, theme_minimal,
+          and proper ggsize for scaling, but doesn't leverage lets-plot specific interactive
+          features or advanced density options
+  verdict: APPROVED
diff --git a/plots/density-basic/metadata/matplotlib.yaml b/plots/density-basic/metadata/matplotlib.yaml
index 46b8d9c49f..9e40e1aa52 100644
--- a/plots/density-basic/metadata/matplotlib.yaml
+++ b/plots/density-basic/metadata/matplotlib.yaml
@@ -26,3 +26,176 @@ review:
   - X-axis label could include units for full marks (e.g., Test Score (points))
   - Relies on scipy.stats.gaussian_kde rather than exploring matplotlib-native density
     plotting options
+  image_description: The plot displays a kernel density estimation (KDE) curve for
+    test scores. The main distribution is shown as a smooth blue curve (#306998) with
+    semi-transparent blue fill. The density peaks around 72-75 representing the main
+    student group, with a visible shoulder/secondary mode around 45 from the lower-performing
+    group, demonstrating the slight bimodality. A rug plot at the bottom shows individual
+    observations as vertical tick marks in matching blue. The title "density-basic
+    · matplotlib · pyplots.ai" is centered at the top. X-axis labeled "Test Score"
+    ranges from ~20 to ~100+, Y-axis labeled "Density" ranges from 0 to ~0.026. The
+    plot has clean styling with removed top/right spines and subtle dashed gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curve and rug plot clearly visible with appropriate linewidth
+          (3) and alpha (0.4)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue), excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-proportioned with tight_layout, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels ("Test Score", "Density") but no units on x-axis
+          (could be "Test Score (points)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3) which is good, but no legend present (though
+          not strictly needed for single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density/KDE plot implementation using scipy.stats.gaussian_kde
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis, density to y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has smooth density curve, fill under curve with transparency, rug
+          plot as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate x-range extension
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "density-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows bimodality (main peak + lower group), left-skewed distribution,
+          outliers visible in rug
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores is a real, comprehensible scenario; bimodal distribution
+          is realistic for student performance
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values are realistic (0-100 test scores), but clipping to 0-100 after
+          normal generation creates slight artifacts at boundaries
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set correctly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib, numpy, scipy.stats)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses scipy for KDE rather than matplotlib-native features. Good use
+          of fill_between, spines removal, but could leverage more matplotlib-specific
+          styling
+  verdict: APPROVED
diff --git a/plots/density-basic/metadata/plotly.yaml b/plots/density-basic/metadata/plotly.yaml
index c153d96373..726743906d 100644
--- a/plots/density-basic/metadata/plotly.yaml
+++ b/plots/density-basic/metadata/plotly.yaml
@@ -27,3 +27,176 @@ review:
     generic "Test Score")
   - Does not leverage Plotly's built-in density/histogram functions (ff.create_distplot
     or histogram with histnorm)
+  image_description: The plot shows a density curve (KDE) for test scores ranging
+    from approximately 30 to 120. The curve is rendered in a blue color (#306998)
+    with a semi-transparent light blue fill under the curve. The distribution is slightly
+    bimodal with a main peak around 75-78 and a secondary shoulder/peak around 85-88.
+    A rug plot is displayed along the x-axis showing individual observations as vertical
+    gray tick marks - these are densely clustered in the 60-95 range with sparse outliers
+    below 50 and above 100. The title "density-basic · plotly · pyplots.ai" is centered
+    at the top. Axis labels are "Test Score" (x-axis) and "Density" (y-axis). A legend
+    in the upper right shows "Density" (line) and "Observations" (tick mark). The
+    background uses the plotly_white template with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all excellently
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curve line width of 4 is clearly visible, fill with appropriate
+          transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with adequate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units (e.g., "Test Score (points)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.2 is subtle, legend well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density/KDE plot with smooth curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis, density on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes fill under curve with transparency, rug plot as suggested
+          in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Density curve and Observations
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format `density-basic · plotly · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Bimodal distribution demonstrates distribution shape detection capability,
+          shows skewness and modality as mentioned in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Test scores is a plausible scenario but could be more specific (e.g.,
+          "SAT Math Scores", "Final Exam Scores")
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores in 40-110 range are realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html but implementation manually computes
+          KDE rather than using library functions
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of hover templates for interactivity, HTML export for interactive
+          version. However, manually computes KDE instead of using plotly's built-in
+          histogram with histnorm='probability density' or ff.create_distplot. Still
+          uses plotly features like fill='tozeroy' and custom hover templates.
+  verdict: APPROVED
diff --git a/plots/density-basic/metadata/plotnine.yaml b/plots/density-basic/metadata/plotnine.yaml
index cc7c24eb19..730130bcd4 100644
--- a/plots/density-basic/metadata/plotnine.yaml
+++ b/plots/density-basic/metadata/plotnine.yaml
@@ -25,3 +25,177 @@ review:
   weaknesses:
   - Rug plot color (yellow) could have better contrast against the white background
     for accessibility
+  image_description: 'The plot displays a smooth density curve representing the distribution
+    of test scores. The main density area is filled with a semi-transparent blue color
+    (#306998) with a matching blue outline. The distribution shows clear bimodality:
+    a primary peak around 72 points and a secondary bump around 88 points, representing
+    the "high achievers" group. A yellow/gold rug plot along the x-axis shows individual
+    observations as small vertical lines. The x-axis is labeled "Test Score (points)"
+    ranging from 30 to 100, and the y-axis shows "Probability Density" ranging from
+    0.00 to 0.03. The title "density-basic · plotnine · pyplots.ai" is prominently
+    displayed at the top. The plot uses a clean minimal theme with subtle gray grid
+    lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable with
+          appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curve well-sized with appropriate fill alpha (0.6), rug marks
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue density is colorblind-safe; yellow rug could have slightly better
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content, appropriate whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Test Score (points)" and "Probability
+          Density"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with alpha 0.3, no legend needed for single distribution
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density/KDE plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable correctly mapped to x-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth curve ✓, fill with transparency ✓, rug plot ✓, appropriate
+          bandwidth
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single distribution, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "density-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Bimodal distribution showcases distribution characteristics (skewness,
+          modality) as mentioned in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores is a compelling real-world scenario mentioned in spec
+          examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores between 0-100 are sensible; 200 observations is within recommended
+          range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plotnine's grammar of graphics with geom_density + geom_rug
+          layering, theme_minimal, and element_text sizing. Could leverage additional
+          plotnine features like annotate or faceting for bonus.
+  verdict: APPROVED
diff --git a/plots/density-basic/metadata/pygal.yaml b/plots/density-basic/metadata/pygal.yaml
index 0ddca3456f..6b7f84048a 100644
--- a/plots/density-basic/metadata/pygal.yaml
+++ b/plots/density-basic/metadata/pygal.yaml
@@ -26,3 +26,169 @@ review:
     optimal readability
   - Manual KDE computation works but does not leverage any pygal-specific statistical
     features
+  image_description: The plot displays a smooth density curve (KDE) filled with a
+    blue color (#306998) on a white background. The curve represents test score distribution,
+    peaking around 75-77 points with a maximum density of approximately 0.041. A secondary
+    shoulder is visible around 60-65 points, demonstrating the left-skewed distribution
+    as intended. A rug plot shows individual observations as small vertical marks
+    along the x-axis between approximately 50-100. The title "density-basic · pygal
+    · pyplots.ai" appears at the top. X-axis is labeled "Test Score (points)" ranging
+    from ~40 to ~105, and Y-axis shows "Probability Density" from 0 to ~0.04. Subtle
+    horizontal grid lines aid readability.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; font sizes are appropriate
+          for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Density curve is clearly visible with good fill; rug marks could
+          be slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color is colorblind-safe with good contrast against white
+          background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions overall; slight extra whitespace on the right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Test Score (points)" and "Probability
+          Density"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle horizontal grid lines; no legend needed (single series), but
+          y-guides are faint
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct KDE/density plot with smooth curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows continuous variable (test scores), Y-axis shows probability
+          density
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes smooth density curve, filled area with transparency, and
+          rug plot as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes appropriately show all data with padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series density plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "density-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows left-skewed distribution with bimodal hint; demonstrates KDE
+          smoothing well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores is a real, comprehensible scenario with plausible distribution
+          shape
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Test scores in 40-100 range are realistic; density values are correct
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → KDE computation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: numpy, pygal, Style'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves to plot.png but also plot.html (correct, but minor)
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/density-basic/metadata/seaborn.yaml b/plots/density-basic/metadata/seaborn.yaml
index 3b789f1597..2a6a0c2f20 100644
--- a/plots/density-basic/metadata/seaborn.yaml
+++ b/plots/density-basic/metadata/seaborn.yaml
@@ -25,3 +25,172 @@ review:
     rather than two distinct peaks
   - Grid could use slightly lower alpha (0.2 instead of 0.3) for even more subtle
     appearance
+  image_description: The plot displays a kernel density estimation (KDE) curve showing
+    the distribution of test scores. The curve is filled with a semi-transparent blue
+    color (#306998) and has a thicker outline. The plot shows a bimodal distribution
+    with a main peak around 72-75 points and a secondary shoulder/peak around 88-90
+    points. A rug plot is visible along the x-axis showing individual data points
+    as small vertical lines. The title "density-basic · seaborn · pyplots.ai" is displayed
+    at the top. The x-axis is labeled "Test Score (points)" and the y-axis is labeled
+    "Density". A subtle dashed grid is present in the background. The layout is clean
+    with good proportions in 16:9 aspect ratio.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curve clearly visible with appropriate fill alpha and line
+          width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content, balanced whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units ("Test Score (points)", "Density")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle at alpha=0.3 with dashed style (good), but no legend
+          present (not strictly needed for single-series density)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct KDE/density plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable correctly visualized
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes filled curve with transparency, rug plot showing individual
+          observations
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full distribution visible from ~30 to ~120
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "density-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution demonstrating skewness/multimodality as
+          mentioned in spec, but the bimodality is somewhat subtle (appears more as
+          a shoulder)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores is a perfect realistic context for density plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores ranging ~40-110 are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib.pyplot, numpy, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's `kdeplot` with fill parameter and `rugplot` - both
+          are seaborn-specific statistical visualization functions
+  verdict: APPROVED
diff --git a/plots/donut-basic/metadata/altair.yaml b/plots/donut-basic/metadata/altair.yaml
index 07caabcc98..1af5fe4275 100644
--- a/plots/donut-basic/metadata/altair.yaml
+++ b/plots/donut-basic/metadata/altair.yaml
@@ -25,3 +25,169 @@ review:
     be more distinct
   - Percentage labels on very small segments may be harder to read at smaller display
     sizes
+  image_description: 'The plot displays a donut chart with 5 colored segments representing
+    budget allocation by department. The segments are: Marketing (28%, dark blue),
+    Development (35%, yellow), Operations (18%, light blue), Sales (12%, gray), and
+    Support (7%, sage green). White percentage labels appear on each segment. The
+    hollow center contains "Total: $100M" in blue text. The title "donut-basic · altair
+    · pyplots.ai" is positioned at the top center. A legend with category names and
+    colored circles is placed on the right side. White stroke separates segments for
+    clarity.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, legend, and center text are clearly readable. Percentage labels
+          on segments are legible but slightly small on smaller segments.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-positioned.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: All segments are clearly visible with good proportions. White stroke
+          separation enhances clarity.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color differentiation; blue/yellow/gray/green palette is mostly
+          colorblind-safe, though the two blues (Marketing and Operations) could be
+          more distinct.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-centered donut with appropriate proportions, good use of whitespace.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed on the right, clear and appropriately sized.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct donut/ring chart with hollow center.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to segments by value.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has percentage labels on segments, center text with key metric, consistent
+          segment ordering.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly matches segment colors and labels.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "donut-basic · altair · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying segment sizes well (7% to 35%), demonstrates donut
+          chart effectively. Could show more extreme size variations.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a realistic, relatable business
+          scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible for budget allocation. 5 categories is appropriate
+          (spec says 3-8).
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → chart → layers → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Altair''s declarative grammar: mark_arc for donut,
+          layering for text overlays, tooltips for interactivity, proper encoding
+          with theta channel.'
+  verdict: APPROVED
diff --git a/plots/donut-basic/metadata/bokeh.yaml b/plots/donut-basic/metadata/bokeh.yaml
index 32341d0fcd..cf42cd1f5f 100644
--- a/plots/donut-basic/metadata/bokeh.yaml
+++ b/plots/donut-basic/metadata/bokeh.yaml
@@ -25,3 +25,170 @@ review:
     of duplicating percentages visible on segments
   - 'Two similar teal colors (#4ECDC4 and #95E1D3) could be harder to distinguish
     for some users'
+  image_description: 'The plot displays a donut chart (ring chart) showing portfolio
+    allocation across 5 asset classes. The chart uses a square 3600x3600 format with
+    the donut centered slightly left. Colors used are: Python Blue (#306998) for Technology
+    (35%), Yellow (#FFD43B) for Healthcare (25%), Teal (#4ECDC4) for Finance (20%),
+    Coral/Red (#FF6B6B) for Energy (12%), and Light Teal (#95E1D3) for Retail (8%).
+    Percentage labels are positioned within each segment. The center displays "Total"
+    with "100" below it in Python Blue. A legend on the right side shows each category
+    with its percentage. The title "donut-basic · bokeh · pyplots.ai" appears at the
+    top left.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable; title 36pt, segment labels 26pt, center text 32-48pt,
+          legend 22pt - all appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Donut segments well-sized with thick ring width (0.45-0.95 radius),
+          good visual weight
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good contrast and mostly colorblind-safe palette, though some similar
+          teal shades could be improved
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with chart centered and legend positioned to the
+          right
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right side with clear color boxes
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct donut/ring chart using annular_wedge
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to segments, values to angles
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Center space used for key metric (Total: 100), percentage labels
+          on segments, 5 categories (within 3-8 range)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible and properly proportioned
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match categories and show percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "donut-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied segment sizes (35% down to 8%), demonstrates donut chart
+          well, though could show more extreme size differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Portfolio allocation by asset class is a real, comprehensible scenario
+          perfectly suited for donut charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values sum to 100% appropriately; percentages are realistic for portfolio
+          allocation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values used)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (math.pi, numpy, bokeh components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but legend labels show "(35%)" which
+          duplicates the value display
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, cumsum transform, annular_wedge which are
+          Bokeh-specific; exports both PNG and HTML
+  verdict: APPROVED
diff --git a/plots/donut-basic/metadata/highcharts.yaml b/plots/donut-basic/metadata/highcharts.yaml
index be8b73e364..a15a58af33 100644
--- a/plots/donut-basic/metadata/highcharts.yaml
+++ b/plots/donut-basic/metadata/highcharts.yaml
@@ -22,3 +22,175 @@ review:
   weaknesses:
   - Legend text appears slightly small relative to the chart size on the right side
   - 'Center text could be better vertically centered (currently offset with y: 60)'
+  image_description: 'The plot displays a donut chart showing budget allocation across
+    5 categories. The chart has a hollow center displaying "Total $100M". Segments
+    are colored with a colorblind-safe palette: blue (Marketing 28%), yellow (Development
+    35%), purple (Operations 18%), cyan (Research 12%), and brown (Support 7%). Each
+    segment has clear percentage labels positioned outside the ring. The title "donut-basic
+    · highcharts · pyplots.ai" appears at the top. A vertical legend on the right
+    lists all categories with corresponding color indicators. The layout is clean
+    with good whitespace balance.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, data labels, and legend are all clearly readable at the 4800x2700
+          resolution with appropriate font sizes (48px title, 28px labels)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; data labels are well-spaced around the donut
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ring thickness (55% inner size) provides excellent segment visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses recommended colorblind-safe palette with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall layout but the donut is slightly left of center due
+          to right-aligned legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for pie/donut charts (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed and readable
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct donut chart (pie with inner hole)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has hollow center with summary (Total $100M), percentage labels on
+          segments, consistent segment ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 5 categories with varying proportions demonstrating the donut
+          format well; could show more variation in segment sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a very realistic and relatable
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible percentages; the total shown as $100M works
+          but data values (28, 35, etc.) are treated as percentages
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts Core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs plot.png correctly but also creates intermediate plot_raw.png
+          (cleaned up)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts PieSeries with innerSize for donut effect, allowPointSelect
+          for interactivity, proper subtitle positioning in center. Could leverage
+          more Highcharts-specific features like tooltips or animations in the HTML
+          output.
+  verdict: APPROVED
diff --git a/plots/donut-basic/metadata/letsplot.yaml b/plots/donut-basic/metadata/letsplot.yaml
index 4a2807b27c..5b669426df 100644
--- a/plots/donut-basic/metadata/letsplot.yaml
+++ b/plots/donut-basic/metadata/letsplot.yaml
@@ -21,3 +21,166 @@ review:
   weaknesses:
   - Legend could be positioned closer to the chart for better visual cohesion
   - No explicit random seed (though data is deterministic, best practice to include)
+  image_description: 'The plot displays a donut chart showing budget allocation by
+    department. The chart has a hollow center containing "Total $100M" text. Five
+    colored segments represent different departments: Marketing (28.0%, Python blue
+    #306998), Operations (22.0%, yellow), R&D (25.0%, green), Sales (18.0%, orange),
+    and HR (7.0%, purple). Each segment has a percentage label positioned on the segment.
+    A legend titled "Department" appears on the right side listing all five categories
+    with corresponding color markers. The title "donut-basic · letsplot · pyplots.ai"
+    is centered at the top. The overall layout is clean with good whitespace balance.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, center label, and percentage labels are clearly readable.
+          Legend text is slightly small but acceptable.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all percentage labels are well-positioned on
+          segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Donut segments are appropriately sized with good ring width (hole=0.5)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with Python blue, yellow, green, orange,
+          purple - all distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions; slight imbalance with legend positioned far right
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed, no grid needed for donut chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct donut chart with hollow center
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has percentage labels, center metric ($100M), 5 categories (within
+          3-8 range)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to department names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "donut-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variety in segment sizes (7% to 28%), demonstrates donut features
+          well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a realistic, relatable business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible; $100M total is realistic for a company budget
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random), but no explicit seed statement
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses geom_pie with hole parameter, layer_labels for percentage formatting,
+          geom_label for center annotation, theme_void - good use of lets-plot features
+  verdict: APPROVED
diff --git a/plots/donut-basic/metadata/matplotlib.yaml b/plots/donut-basic/metadata/matplotlib.yaml
index ef369320ee..ea3adca87f 100644
--- a/plots/donut-basic/metadata/matplotlib.yaml
+++ b/plots/donut-basic/metadata/matplotlib.yaml
@@ -26,3 +26,169 @@ review:
     ability to compare proportions
   - Could benefit from slightly more contrast between adjacent segments (Operations
     light blue and Marketing blue are somewhat similar)
+  image_description: 'The plot displays a donut chart with 5 segments representing
+    budget allocation categories. The colors used are: blue (#306998) for Marketing
+    (25.0%), yellow (#FFD43B) for Development (35.0%), light blue (#5BA0D0) for Operations
+    (15.0%), sage green (#8FBC8F) for Sales (18.0%), and light pink (#DDA0DD) for
+    Support (7.0%). Each segment displays its percentage in white bold text positioned
+    within the ring. Category labels are placed outside the ring in black text. The
+    center of the donut contains "Total $100K" in blue text. The title "donut-basic
+    · matplotlib · pyplots.ai" appears at the top in dark text. The chart uses white
+    edge lines between segments for separation. Overall layout is balanced with good
+    use of the square canvas.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, category labels at 20pt, percentage labels at 16pt
+          - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ring width of 0.5 provides excellent visibility, white edges separate
+          segments clearly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are distinguishable and colorblind-safe (no red-green only
+          distinctions)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, square format appropriate for circular chart
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed, labels serve as legend effectively
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct donut/ring chart with hollow center
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Center text with total, percentage labels on segments, 5 categories
+          (within 3-8 range)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible and properly sized
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Category labels correctly positioned and accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "donut-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 5 categories with varying sizes, but could have more dramatic
+          size differences to better demonstrate comparison
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation is a perfect real-world use case for donut charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values sum to 100% which is correct; "$100K" total is plausible but
+          the "K" suffix with percentages that happen to sum to 100 is slightly confusing
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of wedgeprops for ring width and styling, pctdistance/labeldistance
+          for positioning. Could have used matplotlib's more advanced features like
+          shadow or explode.
+  verdict: APPROVED
diff --git a/plots/donut-basic/metadata/plotly.yaml b/plots/donut-basic/metadata/plotly.yaml
index 728746ff53..d196d17cb2 100644
--- a/plots/donut-basic/metadata/plotly.yaml
+++ b/plots/donut-basic/metadata/plotly.yaml
@@ -22,3 +22,166 @@ review:
   weaknesses:
   - Legend positioning creates slight layout imbalance
   - Could leverage more Plotly interactivity features like custom hover templates
+  image_description: The plot displays a donut chart with 6 budget allocation categories
+    rendered as colored ring segments. The largest segment is Engineering (35%) in
+    Python blue (#306998), followed by Marketing (20%) in yellow, Operations (15%)
+    in teal, Sales (15%) in coral/salmon, R&D (10%) in green, and HR (5%) in orange.
+    Each segment has a slight pull/explosion effect and white borders between segments.
+    Labels with category names and percentages are positioned outside the ring. The
+    center contains "Total $100M" in blue text. A vertical legend on the right lists
+    all categories. The title "donut-basic · plotly · pyplots.ai" appears at the top
+    center.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, labels at 20pt, center annotation at 36pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All labels positioned outside the ring with no overlapping text
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ring segments are well-sized with good thickness (hole=0.5), slight
+          pull effect adds visual separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with distinct hues (blue, yellow, teal,
+          coral, green, orange)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall but the legend positioned far right creates some asymmetry
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed, no grid needed for donut charts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct donut chart with hollow center
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has percentage labels, center metric ($100M total), consistent ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible, no data cut off
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match segment categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "donut-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 6 categories with varying sizes, demonstrates donut chart well
+          but could show more extreme size contrasts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation scenario is highly realistic and relatable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values sum to 100 (representing $100M), percentages are plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean flat structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Pie with hole parameter, add_annotation for center text,
+          and HTML export for interactivity. Could have used more Plotly-specific
+          features like hovertemplate customization.
+  verdict: APPROVED
diff --git a/plots/donut-basic/metadata/pygal.yaml b/plots/donut-basic/metadata/pygal.yaml
index a613e702fb..d432944700 100644
--- a/plots/donut-basic/metadata/pygal.yaml
+++ b/plots/donut-basic/metadata/pygal.yaml
@@ -25,3 +25,166 @@ review:
   - Two pairs of similar colors (blues and yellows) reduce distinguishability between
     segments
   - Does not showcase pygal interactive/tooltip capabilities
+  image_description: 'The plot displays a donut chart showing budget allocation across
+    5 departments. The ring has a substantial hollow center (60% inner radius). Segments
+    are colored: Engineering (35%) in steel blue, Marketing (25%) in golden yellow,
+    Sales (20%) in lighter blue, Operations (12%) in pale yellow, and HR (8%) in gray.
+    Percentage values are displayed on each segment. The title "donut-basic · pygal
+    · pyplots.ai" appears at the top in dark text. A horizontal legend at the bottom
+    shows all 5 categories with colored squares. The overall layout is clean with
+    a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and legend are readable; percentage labels are clear but could
+          be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Segments are well-sized and clearly visible with good ring width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable; two similar blue shades (Engineering/Sales)
+          and two similar yellow shades (Marketing/Operations) could be improved
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, chart centered nicely
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed at bottom, clear category labels
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct donut chart (pie with inner_radius)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has hollow center, percentage labels on segments, consistent ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible, values sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: donut-basic · pygal · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied segment sizes (8-35%); missing center text/metric as
+          suggested in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic percentages; could have more variation in mid-range
+          segments
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → chart → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to plot.svg first, then plot.png (minor issue with also saving
+          .html)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Style customization and inner_radius for donut, but
+          doesn't leverage tooltips or other interactive features that pygal excels
+          at
+  verdict: APPROVED
diff --git a/plots/donut-basic/metadata/seaborn.yaml b/plots/donut-basic/metadata/seaborn.yaml
index 09551fa404..740ac5d4d1 100644
--- a/plots/donut-basic/metadata/seaborn.yaml
+++ b/plots/donut-basic/metadata/seaborn.yaml
@@ -25,3 +25,160 @@ review:
     chart)
   - Could benefit from more extreme variation in segment sizes to better demonstrate
     donut chart capabilities
+  image_description: 'The plot displays a donut chart showing budget allocation across
+    6 departments. The ring uses seaborn''s Set2 color palette with distinct pastel
+    colors: orange for Engineering (28.1%), teal-green for Marketing (15.6%), yellow
+    for R&D (17.5%), light green for HR (7.5%), pink for Sales (20.0%), and blue for
+    Operations (11.2%). The center displays "Total $160,000" in bold dark text. Percentage
+    labels appear in white bold text inside each segment. Category labels are positioned
+    outside the ring. The title "donut-basic · seaborn · pyplots.ai" appears at the
+    top. White edge separators divide the segments. The chart uses a square aspect
+    ratio appropriate for circular visualizations.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, labels, and percentages are clearly readable; center text
+          is prominent
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All labels and percentages are well-positioned without overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Donut ring has appropriate width (0.5), segments clearly distinguished
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Set2 palette is generally colorblind-friendly, though some similar
+          tones
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of square canvas, donut well-centered
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct donut chart with hollow center
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Center text with total, percentage labels, consistent ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible and proportional
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Category labels accurate and positioned correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "donut-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 6 categories with varying proportions; could show more extreme
+          variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for departmental budgets, though could include
+          smaller departments
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values), no random seed needed but
+          also no random data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib/seaborn APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's color_palette and set_theme, but donut chart is matplotlib-native;
+          seaborn doesn't have a dedicated donut/pie function
+  verdict: APPROVED
diff --git a/plots/donut-nested/metadata/altair.yaml b/plots/donut-nested/metadata/altair.yaml
index a5c6461421..e54fb436a7 100644
--- a/plots/donut-nested/metadata/altair.yaml
+++ b/plots/donut-nested/metadata/altair.yaml
@@ -25,3 +25,173 @@ review:
   - Outer ring labels could be slightly larger for better readability at full size
   - No explicit legend for smaller segments that do not have labels
   - The Engineering label in the inner ring slightly clips at the edge of its segment
+  image_description: 'The plot displays a nested donut chart showing budget allocation
+    across 4 departments. The inner ring shows department totals (Engineering, Marketing,
+    Sales, Operations) with dark colors and white bold labels centered in each segment.
+    The outer ring shows 14 expense categories using lighter variations within each
+    department''s color family: Blue shades for Engineering (Salaries, Equipment,
+    Software, Training), Yellow/Gold for Marketing (Digital Ads, Events, Content),
+    Teal/Green for Sales (Commissions, Travel, Tools), and Purple shades for Operations
+    (Facilities, IT Support, Logistics, HR). White strokes separate all segments.
+    Labels appear on larger outer segments. The title "donut-nested · altair · pyplots.ai"
+    is positioned at the top center. The chart uses a square 1200x1200 canvas (scaled
+    3x to 3600x3600) and fills a good portion of the available space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and inner labels clearly readable, outer labels could be slightly
+          larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, segments well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Both rings clearly visible with good proportions and spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Color families are colorblind-safe with good contrast between categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight asymmetry due to label placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for donut charts (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed, tooltips available for details, clean presentation
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct nested donut chart with two concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Inner ring shows parent categories, outer shows children with correct
+          aggregation
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has color families per parent, segment alignment, labels on larger
+          segments; spacing between rings present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible and proportionally represented
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No explicit legend, relies on labels and tooltips only
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "donut-nested · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows hierarchy with 4 parents and 14 children, varying segment sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in $K range (50-450K) are realistic for budget allocation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Flat script structure with imports, data, plot, save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random generation
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's layered chart approach with mark_arc, but could leverage
+          more declarative features like tooltips more prominently
+  verdict: APPROVED
diff --git a/plots/donut-nested/metadata/bokeh.yaml b/plots/donut-nested/metadata/bokeh.yaml
index 387b8ab360..fd7f7e7f74 100644
--- a/plots/donut-nested/metadata/bokeh.yaml
+++ b/plots/donut-nested/metadata/bokeh.yaml
@@ -27,3 +27,169 @@ review:
     square is appropriate for donut
   - Missing HoverTool which would enhance interactivity in the HTML output
   - Inner ring label font size could be slightly larger given the segment sizes
+  image_description: 'The plot displays a nested donut chart showing budget allocation
+    data. The **inner ring** shows 4 department totals: Engineering ($800K, dark blue),
+    Marketing ($430K, golden yellow), Sales ($450K, green), and Operations ($330K,
+    purple/mauve). The **outer ring** shows expense categories within each department
+    using lighter shades of the parent color - Engineering has Salaries, Equipment,
+    Training, Cloud; Marketing has Advertising, Events, Content; Sales has Salaries,
+    Travel, Tools; Operations has Facilities, IT Support, Utilities. The center displays
+    "Total $2010K" in blue text. Title "donut-nested · bokeh · pyplots.ai" appears
+    at the top. White line separators divide segments. Labels show category names
+    and values in $K format. The chart uses a square 1:1 aspect ratio with good canvas
+    utilization.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable, though some outer ring labels are slightly small
+          for the segment size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; smaller segments appropriately unlabeled
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Donut rings well-sized, good visual hierarchy between inner/outer
+          rings
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Color families work well, but yellow/green could be challenging for
+          some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, donut fills the space appropriately
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed; labels serve as legend effectively
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct nested donut chart with two concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Inner ring = departments, outer ring = expense categories
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchical data, color families, segment alignment, labels on larger
+          segments
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, no cut-off
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Labels accurately reflect data values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "donut-nested · bokeh · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical relationship well; could show more variation in
+          segment sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in $K are realistic for department budgets
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but uses math operations
+          correctly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of annular_wedge, ColumnDataSource, LabelSet; could leverage
+          HoverTool for interactivity
+  verdict: APPROVED
diff --git a/plots/donut-nested/metadata/highcharts.yaml b/plots/donut-nested/metadata/highcharts.yaml
index 961d57531d..4a4656f3f7 100644
--- a/plots/donut-nested/metadata/highcharts.yaml
+++ b/plots/donut-nested/metadata/highcharts.yaml
@@ -26,3 +26,168 @@ review:
     could benefit from larger font sizes for better readability at full resolution
   - Spec suggests using a legend for smaller segments, but legend is disabled; this
     is a minor deviation from spec guidance
+  image_description: 'The plot displays a nested donut chart for annual budget allocation.
+    The **inner ring** shows 4 department segments: Engineering (dark blue, $4,500,000),
+    Marketing (yellow, $2,800,000), Operations (purple, $1,900,000), and Sales (cyan,
+    $2,200,000). Each department segment has bold white text labels with the department
+    name and dollar value positioned inside the segment. The **outer ring** contains
+    15 expense category segments, each colored with lighter shades matching their
+    parent department''s hue family (e.g., Engineering''s children use progressively
+    lighter blues for Salaries, Equipment, Training, Software). Outer ring labels
+    (expense names) are positioned outside the chart with connector lines. The chart
+    uses a square 3600×3600 format with a white background. The title at the top reads
+    "Annual Budget Allocation by Department · donut-nested · highcharts · pyplots.ai"
+    with a subtitle "Inner: Departments | Outer: Expense Categories".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title 48px, subtitle 32px, inner labels 26px, outer labels 20px -
+          all readable, though some outer labels (Software, CRM Tools) are slightly
+          small for their segment size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels well-positioned inside (inner) and outside
+          (outer) the rings
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Both rings clearly visible with good sizing (inner 20-45%, outer
+          55-85%), white gap between rings provides excellent separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette (blue, yellow, purple, cyan families); no
+          red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Chart fills ~60% of canvas, good proportions, though slight top-heaviness
+          with title/subtitle
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled for cleaner look; subtitle explains the ring hierarchy
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct nested donut chart with two concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Inner ring = departments (level_1), outer ring = expenses (level_2),
+          values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has consistent color families per parent, child segments align with
+          parent boundaries, includes labels; missing: no legend for smaller segments
+          as spec suggests'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, values aggregate correctly (Engineering children
+          sum to $4.5M)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correctly formatted: "{description} · donut-nested · highcharts
+          · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 4 parent categories with 3-4 children each, demonstrates varying
+          sizes within each family
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation is a perfect real-world scenario; department names
+          and expense categories are plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values range from $200K (CRM Tools) to $2.8M (Salaries) - realistic
+          corporate budget figures
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → chart config → series → HTML generation
+          → screenshot'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded), but no random seed needed; however
+          the structure could be cleaner
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: highcharts_core, PIL, selenium, tempfile, urllib,
+          etc.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts_core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/donut-nested/metadata/letsplot.yaml b/plots/donut-nested/metadata/letsplot.yaml
index 7cbe986e23..3b3bc122c9 100644
--- a/plots/donut-nested/metadata/letsplot.yaml
+++ b/plots/donut-nested/metadata/letsplot.yaml
@@ -27,3 +27,176 @@ review:
     (prefer inline code)
   - No legend provided to explain the color scheme, relying solely on segment labels
   - Outer ring labels could include percentage values for better data interpretation
+  image_description: The plot displays a nested donut chart with two concentric rings.
+    The inner ring shows four parent departments (Marketing in blue, Operations in
+    yellow, R&D in green, Sales in orange), with bold white labels. The outer ring
+    displays child expense categories for each department using lighter shades of
+    the parent color family (blues for Marketing subcategories, yellows for Operations,
+    greens for R&D, oranges for Sales). Each segment has a dark text label. The center
+    of the donut displays "Total $100M" in bold black text. White borders separate
+    segments clearly. The title "donut-nested · letsplot · pyplots.ai" appears at
+    the top. The chart uses a square 1:1 aspect ratio with the donut well-centered.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All text is readable; inner ring labels are bold white, outer ring
+          labels are dark and clear. Title is visible at top. Minor: some outer labels
+          could be slightly larger.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all labels are positioned in the middle of their
+          respective arcs without collision.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Segments are clearly visible with good proportions; white borders
+          separate segments excellently.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Colors are distinct: blue, yellow, green, orange families with lighter
+          shades for children. No red-green only distinctions.'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with donut well-centered. The chart uses appropriate
+          canvas area (~60-70%). Slight extra whitespace at bottom.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend provided. While segment labels are present, a legend showing
+          the color-to-category mapping would help interpretation.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct nested donut chart with two concentric rings.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Inner ring shows parent categories (level_1), outer ring shows children
+          (level_2) as specified.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: hierarchical display, color families
+          per parent, spacing between rings, labels on segments.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data values visible and proportionally represented.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Labels directly on segments are accurate.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows exact format: "donut-nested · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 parent categories with 2-3 children each. Good variety in
+          segment sizes showing different proportions. Could show percentage values
+          on segments for completeness.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation scenario is realistic and matches spec example
+          (department totals inner, expense categories outer).
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Total $100M is realistic for company budget. Individual values (4-18M)
+          are reasonable. Minor: would benefit from showing actual percentages.'
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: Uses a function `create_wedge()` which violates pure KISS structure
+          (imports → data → plot → save without functions).
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (no random values), ensuring reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used; no unused imports.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html correctly.
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Good use of lets-plot''s ggplot2 grammar: geom_polygon for custom
+          shapes, coord_fixed for aspect ratio, scale_fill_manual for custom colors,
+          comprehensive theme customization.'
+  verdict: APPROVED
diff --git a/plots/donut-nested/metadata/matplotlib.yaml b/plots/donut-nested/metadata/matplotlib.yaml
index 2848ba8608..e597c15f4a 100644
--- a/plots/donut-nested/metadata/matplotlib.yaml
+++ b/plots/donut-nested/metadata/matplotlib.yaml
@@ -12,10 +12,196 @@ preview_thumb: https://storage.googleapis.com/pyplots-images/plots/donut-nested/
 preview_html: null
 quality_score: 92
 review:
-  strengths: []
+  strengths:
+  - Excellent hierarchical color scheme with consistent color families per department
+    (hue for category, lightness for subcategories)
+  - Clear visual separation between inner and outer rings using white edges
+  - 'Smart labeling strategy: direct labels on larger segments (≥$100K), comprehensive
+    legend for all categories'
+  - Realistic and relatable budget allocation scenario that demonstrates the nested
+    donut concept effectively
+  - Clean code structure following KISS principles with well-organized data definitions
   weaknesses:
   - Legend is positioned slightly far from the chart, creating some visual disconnect
   - Inner ring labels could use a slightly larger font for better prominence at full
     resolution
   - Could include percentage values alongside dollar amounts to better show part-to-whole
     relationships
+  image_description: 'The plot displays a nested donut chart showing budget allocation
+    data. The inner ring shows 4 departments (Engineering $650K in dark blue, Marketing
+    $450K in yellow, Operations $330K in green, Sales $430K in red). The outer ring
+    shows expense categories within each department using lighter shades of the parent
+    department''s color. Labels with values are placed directly on larger segments.
+    A comprehensive legend on the right side lists all 12 expense categories grouped
+    by department (e.g., "Engineering: Salaries", "Marketing: Advertising"). The title
+    "donut-nested · matplotlib · pyplots.ai" appears at the top. White edge lines
+    separate all segments. The chart uses a square 1:1 aspect ratio format.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All text is readable; inner ring labels are bold white and clear;
+          outer ring labels use appropriate sizing. Minor: some outer ring labels
+          could be slightly larger for perfect readability at full size.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels are well-positioned on their respective
+          segments.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Donut rings are well-proportioned with appropriate widths (0.4 inner,
+          0.35 outer); white edge lines provide clear separation.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct color families (blue, yellow, green, red) with varying
+          lightness for subcategories; colorblind-friendly differentiation through
+          both hue and lightness.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall balance; chart fills most of the canvas; legend is well-positioned
+          but slightly far from the chart.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is comprehensive with shadow box styling; clearly organized
+          by department.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct nested donut chart with two concentric rings.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Inner ring shows departments (parent categories), outer ring shows
+          expense categories (children); values aggregate correctly.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Consistent color families per department, labels on larger segments,
+          legend for smaller ones, spacing between rings via white edges.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data is visible; 4 parent categories with 3 children each (within
+          spec's 3-6 parents, 2-5 children).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Legend labels correctly show "Department: Category" format matching
+          the data.'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "donut-nested · matplotlib · pyplots.ai".'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows hierarchy with varying segment sizes; demonstrates aggregation
+          from outer to inner rings. Minor: could show more variation in segment sizes
+          within departments.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation scenario is realistic and relatable; departments
+          and expense categories are plausible business data.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Dollar values in thousands (K) are appropriate for budget allocation;
+          totals and proportions are sensible.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → colors → figure → inner pie → outer
+          pie → labels → legend → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded values, no random generation needed).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy are imported, both are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but figsize is 12x12 (3600x3600 at 300dpi), which
+          is correct for square format.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib's pie() with wedgeprops for donut styling, manual
+          text annotation for labels, custom legend with Rectangle patches. Could
+          leverage more advanced features like ConnectionPatch for visual linking
+          or annotations.
+  verdict: APPROVED
diff --git a/plots/donut-nested/metadata/plotly.yaml b/plots/donut-nested/metadata/plotly.yaml
index c13253ceb3..1e44fb6fd2 100644
--- a/plots/donut-nested/metadata/plotly.yaml
+++ b/plots/donut-nested/metadata/plotly.yaml
@@ -26,3 +26,170 @@ review:
   - Child segment alignment with parent boundaries could be visually verified more
     explicitly
   - Could benefit from pull effect or hover highlighting to emphasize interactivity
+  image_description: 'The plot displays a nested donut chart showing company budget
+    allocation. The chart consists of two concentric rings on a white background.
+    The inner ring shows four department categories (Engineering, Marketing, Sales,
+    Operations) with their percentage values displayed in white text. The outer ring
+    displays 13 expense subcategories with labels placed outside the ring. Colors
+    follow a family scheme: Engineering uses blues (deep navy to light blue), Marketing
+    uses yellows/golds, Sales uses teals/aquas, and Operations uses reds/corals. The
+    center of the donut contains bold text "$100M Total Budget". A legend on the right
+    side lists all categories. The title "Company Budget Allocation · donut-nested
+    · plotly · pyplots.ai" appears at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt, labels at 16pt, all readable. Inner ring percentages
+          clear at 20pt white text. Slightly under ideal for outer labels.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, outer labels well spaced using outside positioning
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Both rings clearly visible with good proportions, white separator
+          lines between segments work well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent color family approach: blues, yellows, teals, reds are
+          distinguishable even with color vision deficiency'
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, chart is well-centered with legend appropriately
+          placed on the right
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed with clear labels, no grid needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct nested donut chart with two concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Inner ring shows departments (parent), outer shows expense categories
+          (children)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Color families per parent, center annotation, proper hierarchy
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible, values aggregate correctly (45+25+18+12=100M)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All categories properly labeled in legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{topic} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical structure well; 4 parents with 3-4 children each.
+          Could show more varied segment sizes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company budget allocation is a perfect real-world scenario for nested
+          donuts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: $100M total with realistic department splits. Values are sensible
+          though somewhat generic.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → traces → layout → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also generates plot.html (correct for Plotly)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Plotly features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Pie with domain positioning for nesting, custom hover templates.
+          Could leverage more interactive features or annotations.
+  verdict: APPROVED
diff --git a/plots/donut-nested/metadata/plotnine.yaml b/plots/donut-nested/metadata/plotnine.yaml
index f87993e360..a3293fbad4 100644
--- a/plots/donut-nested/metadata/plotnine.yaml
+++ b/plots/donut-nested/metadata/plotnine.yaml
@@ -24,3 +24,181 @@ review:
   - Helper function slightly deviates from KISS principle but is necessary for the
     polygon-based approach
   - Outer ring labels could be slightly larger for better readability at full resolution
+  image_description: 'The plot displays a nested donut chart showing budget allocation
+    by department. The inner ring shows four parent categories: Engineering (blue),
+    Marketing (yellow), Operations (green), and Sales (red). Each parent has bold
+    white text labels. The outer ring shows child categories with lighter shades of
+    the parent colors: Engineering has Salaries, Equipment, and Training; Marketing
+    has Advertising, Events; Sales has Commissions, Travel; Operations has IT Infrastructure,
+    Facilities. Child segment labels are in dark gray text. There is a clear gap between
+    the inner and outer rings for visual separation. The title "Budget Allocation
+    by Department · donut-nested · plotnine · pyplots.ai" appears at the top. The
+    plot uses a square 1:1 aspect ratio with the donut centered and good canvas utilization.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable, inner labels bold white, outer labels dark gray.
+          Title is clear. Slight deduction as outer labels could be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, segments well separated with gaps
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Segments clearly visible with good alpha values, small segments are
+          still visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue/yellow/green/red color families with good differentiation,
+          colorblind-friendly palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square layout with centered donut, good canvas utilization (~60%
+          of canvas used)
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for donut charts, axes are hidden appropriately
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed, no legend needed (labels on segments)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct nested donut chart with two concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Inner ring = parent categories (departments), outer ring = child
+          categories (expense types)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has consistent color families per parent, child segments aligned
+          with parent boundaries, gap between rings, labels on larger segments
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible, proportions correct
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Direct labeling used, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "donut-nested · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows 4 parent categories with 2-3 children each, demonstrates hierarchical
+          relationships well. Minor deduction: could show more variation in segment
+          sizes'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is an excellent, realistic scenario
+          matching the spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in \$K range are realistic for department budgets, though
+          some values are nicely rounded
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses a helper function `create_annular_segment`, which deviates slightly
+          from pure KISS but is necessary for plotnine polygon approach
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values, no random)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics with geom_polygon, scale_fill_identity,
+          coord_fixed. Creative solution using polygons since plotnine lacks native
+          pie/donut geoms. Minor deduction as it relies on manual polygon construction
+          rather than ggplot-native pie approach.
+  verdict: APPROVED
diff --git a/plots/donut-nested/metadata/pygal.yaml b/plots/donut-nested/metadata/pygal.yaml
index 4125df21ca..62ef5a09df 100644
--- a/plots/donut-nested/metadata/pygal.yaml
+++ b/plots/donut-nested/metadata/pygal.yaml
@@ -25,3 +25,175 @@ review:
   - Legend text at bottom is somewhat small and could be larger for better readability
   - The PIL compositing approach adds code complexity that could benefit from comments
     explaining the technique
+  image_description: 'The plot displays a nested donut chart with two concentric rings
+    on a white background. The outer ring shows 12 budget subcategories (Salaries,
+    Equipment, Training for Engineering; Advertising, Events, Content for Marketing;
+    Infrastructure, Utilities, Maintenance for Operations; Commissions, Travel, Tools
+    for Sales). The inner ring displays the 4 department totals: Engineering ($700K,
+    blue), Marketing ($500K, yellow), Operations ($340K, green), and Sales ($360K,
+    red). Each department uses a consistent color family - Engineering in blue shades,
+    Marketing in yellow shades, Operations in green shades, and Sales in red/coral
+    shades. Values are displayed in dollar format ($XXK). The title "donut-nested
+    · pygal · pyplots.ai" appears at the top. A legend at the bottom lists all 12
+    subcategories in 4 columns.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and values clearly readable, though legend text at bottom is
+          quite small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Both rings are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of color families; blue/yellow/green/red are distinguishable
+          but green and blue could be closer for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart fills canvas well, good proportions, legend placed at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for donut charts (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend at bottom is functional, no grid needed for donuts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct nested donut chart with two concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Inner ring shows departments, outer shows subcategories
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchical display with color families, values displayed
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, values sum correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all 12 subcategories with parent prefix
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "donut-nested · pygal · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical relationships, varying segment sizes, color families;
+          could have more variance in child distributions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation is a perfect real-world use case matching the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for budget allocations though all in similar
+          magnitude
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses PIL for compositing which adds complexity, but necessary for
+          nested effect in pygal
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png (correct) and plot.html (bonus)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Creative use of pygal's transparent background rendering and PIL
+          compositing to achieve nested effect; uses Style customization well; generates
+          both PNG and HTML
+  verdict: APPROVED
diff --git a/plots/donut-nested/metadata/seaborn.yaml b/plots/donut-nested/metadata/seaborn.yaml
index 6ce5290653..1891fe559b 100644
--- a/plots/donut-nested/metadata/seaborn.yaml
+++ b/plots/donut-nested/metadata/seaborn.yaml
@@ -29,3 +29,175 @@ review:
     at full resolution
   - Category legend only shows blue shades which may cause slight confusion as each
     region has different color shades for the same categories
+  image_description: The plot displays a nested donut chart showing regional budget
+    allocation. The inner ring shows 4 regions (North America $120M in dark blue,
+    Europe $95M in yellow, Asia Pacific $105M in teal, Latin America $50M in gray).
+    The outer ring displays expense categories (Salaries, Marketing, Operations, R&D)
+    with color gradients from dark to light shades matching each region's parent color.
+    The center shows "Total Budget $370M" in bold. Two well-organized legends appear
+    on the left side - one for regions (inner) and one for categories (outer). The
+    title "donut-nested · seaborn · pyplots.ai" is displayed at the top in bold. The
+    plot uses a square 12x12 figure with white background, white edge separations
+    between segments, and labels on the inner ring segments showing region names and
+    values.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt, center text at 26pt, labels at 13pt, legend at 16pt
+          - all readable but inner ring labels slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Wedges well-sized, clear visual separation with white edges
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Good color distinction using blue, yellow, teal, gray families; colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas, donut fills majority of space, legends
+          well-positioned but slightly tight on left
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for donut charts (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Two legends well-organized with clear titles
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct nested donut chart with inner and outer rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Inner ring: regions (parent), Outer ring: categories (children)
+          - correctly mapped'
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchical data, consistent color families per parent, segment
+          alignment, labels, legends
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, segments proportionally sized
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Both legends correctly labeled for regions and categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "donut-nested · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 4 regions with 4 categories each, good variety in values; could
+          show more variation in relative proportions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Regional budget allocation is a real, comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in millions ($10M-$45M range) are realistic for corporate
+          budgets
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: No random seed used, but data is deterministic - minor deduction
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib.pyplot, numpy, seaborn, Patch)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_theme() and sns.light_palette() for color gradients,
+          but pie charts are matplotlib-native, not seaborn-specific
+  verdict: APPROVED
diff --git a/plots/dumbbell-basic/metadata/altair.yaml b/plots/dumbbell-basic/metadata/altair.yaml
index 6af7c3d97c..45ce66c5bc 100644
--- a/plots/dumbbell-basic/metadata/altair.yaml
+++ b/plots/dumbbell-basic/metadata/altair.yaml
@@ -27,3 +27,179 @@ review:
   - All improvements are positive; including one department with no change or decline
     would demonstrate fuller range of dumbbell chart capabilities
   - Legend title Period could be more descriptive (e.g., Policy Change)
+  image_description: 'The plot displays a horizontal dumbbell chart showing employee
+    satisfaction scores before and after policy changes for 10 departments. Each department
+    is represented on the y-axis (from IT at the top to Customer Support at the bottom).
+    The x-axis shows "Satisfaction Score" ranging from 45 to 90. Each department has
+    two dots connected by a thin gray line: blue dots represent "before" scores and
+    yellow dots represent "after" scores. The title reads "Employee Satisfaction ·
+    dumbbell-basic · altair · pyplots.ai" at the top. A legend in the top-right corner
+    identifies the "before" (blue) and "after" (yellow) periods. The data is sorted
+    by improvement amount, with smallest improvements at top (IT) and largest at bottom
+    (Customer Support). The grid is subtle with dashed lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All text readable; title 28pt, axis labels 18pt, tick labels appropriately
+          sized. Minor: legend could be slightly larger.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dots are clearly visible at size 350, connecting lines are appropriately
+          subtle at strokeWidth=2
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space, chart fills appropriately. Minor: some
+          empty space on right side near legend'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Satisfaction Score" is descriptive but lacks units (e.g., "out
+          of 100" or "%")'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha=0.3 and dashed lines. Legend placement
+          is good but slightly isolated from chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dumbbell chart with connected dots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis as spec recommends
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Horizontal orientation, distinct colors for start/end, sorted by
+          difference, thin connecting lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within scale domain [45, 90]
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "before" and "after" periods
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format with spec-id, library, and pyplots.ai
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows various improvement magnitudes (small: IT at 5pts, large:
+          Customer Support at 26pts). Could show a negative change for fuller coverage.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction scores before/after policy changes is a realistic
+          scenario directly from spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Satisfaction scores in 52-85 range are realistic for such surveys
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random), but explicit seed comment would be
+          clearer
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's declarative grammar with mark_rule + mark_circle
+          layering, proper encoding types, and tooltips. Could leverage more Altair-specific
+          features like selections or conditional encoding.
+  verdict: APPROVED
diff --git a/plots/dumbbell-basic/metadata/bokeh.yaml b/plots/dumbbell-basic/metadata/bokeh.yaml
index 23d6d25d62..2d282dfed3 100644
--- a/plots/dumbbell-basic/metadata/bokeh.yaml
+++ b/plots/dumbbell-basic/metadata/bokeh.yaml
@@ -24,3 +24,180 @@ review:
   - Could leverage Bokeh interactive features (hover tooltips) in the HTML output
   - All improvements are positive; including one negative change would demonstrate
     the plot type full capability
+  image_description: The plot displays a horizontal dumbbell chart comparing employee
+    satisfaction scores before and after policy changes across 8 departments. Blue
+    dots (#306998) represent "Before Policy Changes" and yellow dots (#FFD43B) represent
+    "After Policy Changes", connected by subtle gray lines. Departments are listed
+    on the y-axis (Finance, Sales, Operations, Customer Support, Engineering, Marketing,
+    Human Resources, Research & Development - sorted by improvement magnitude). The
+    x-axis shows "Satisfaction Score" ranging from approximately 50 to 90. The title
+    "dumbbell-basic · bokeh · pyplots.ai" is at the top. A legend in the bottom right
+    identifies the two dot colors.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable. Font sizes are
+          appropriate for the 4800x2700 canvas though could be slightly larger for
+          tick labels.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements. Department names are fully visible.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dots are appropriately sized (size=25) and clearly visible. Connecting
+          lines are subtle but visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe (not red-green).
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though the plot could be slightly more
+          centered. Minor whitespace imbalance on the right.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Satisfaction Score" and "Department".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha=0.3. However, the legend
+          appears cut off/partially visible at the bottom right corner.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dumbbell chart with two dots connected by lines.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis (horizontal orientation as
+          specified).
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors for start/end dots, thin connecting lines, sorted
+          by difference.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (45-95) shows all data points with appropriate padding.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Before Policy Changes" and "After Policy
+          Changes".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "dumbbell-basic · bokeh · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in both starting points and improvement magnitudes.
+          All positive changes shown; could include one negative change for full coverage.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction scores before/after policy changes is a realistic,
+          comprehensible scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Satisfaction scores (55-88) are realistic. Could show slightly more
+          variation in starting points.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: 'Data is deterministic (hardcoded values), but no random seed since
+          none needed. Minor: sorting makes data deterministic.'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and Bokeh's scatter/line methods. Could leverage
+          hover tools or other Bokeh interactivity features in the HTML output.
+  verdict: APPROVED
diff --git a/plots/dumbbell-basic/metadata/highcharts.yaml b/plots/dumbbell-basic/metadata/highcharts.yaml
index 3b7c5bf5e8..b6a62b73db 100644
--- a/plots/dumbbell-basic/metadata/highcharts.yaml
+++ b/plots/dumbbell-basic/metadata/highcharts.yaml
@@ -27,3 +27,178 @@ review:
   - Legend text could be more descriptive with colors mentioned
   - Y-axis title present but x-axis title is null - could benefit from clearer axis
     labeling
+  image_description: 'The plot displays a horizontal dumbbell chart showing employee
+    satisfaction scores before and after policy changes across 8 departments. Each
+    department has two dots connected by a thin gray line: a blue dot (#306998) representing
+    the "before" score and a yellow dot (#FFD43B) representing the "after" score.
+    Data labels display the numeric values on each dot. Departments are listed on
+    the y-axis (Customer Support, Operations, Research & Development, Engineering,
+    Sales, Marketing, Human Resources, Finance) and sorted by the magnitude of change
+    in descending order. The title reads "Employee Satisfaction Before/After · dumbbell-basic
+    · highcharts · pyplots.ai" with a subtitle "Satisfaction scores before and after
+    policy changes by department". A legend in the top-right shows "Before → After".
+    The x-axis ranges from approximately 40 to 95 representing satisfaction scores.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 52px, labels at 32px, axis labels at 28px, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, good spacing between categories
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers radius 18 is appropriate, connector width 5 is visible but
+          not overpowering
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow colorblind-safe palette, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but large left margin for category labels could
+          be reduced slightly
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Satisfaction Score" but no units, categories are self-explanatory
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid subtle with dashed style and #e0e0e0 color, legend well positioned
+          but text slightly small relative to other elements'
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dumbbell chart type using Highcharts dumbbell module
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis (horizontal orientation as
+          spec recommends)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors for start/end, sorting by difference, thin connecting
+          line, all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axis range 40-95 shows all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Before → After" accurately describes the data'
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Uses format but slightly different: "Employee Satisfaction Before/After
+          · dumbbell-basic · highcharts · pyplots.ai" (should be simpler spec-id format
+          but includes context)'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows before/after comparison with varying gaps, all positive changes
+          (no negative changes shown which would add variety)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction scores is a real, comprehensible scenario matching
+          spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores 45-88 are realistic satisfaction percentages
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: All imports used, but json import could be from standard position
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Highcharts dumbbell module with inverted chart for horizontal layout,
+          data labels, custom marker styling
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use but could leverage more interactive features in HTML output
+  verdict: APPROVED
diff --git a/plots/dumbbell-basic/metadata/letsplot.yaml b/plots/dumbbell-basic/metadata/letsplot.yaml
index 1e483e785f..b81932e97c 100644
--- a/plots/dumbbell-basic/metadata/letsplot.yaml
+++ b/plots/dumbbell-basic/metadata/letsplot.yaml
@@ -25,3 +25,175 @@ review:
   - Legend positioned far from data; could be integrated closer using legend_position
   - All data shows positive improvements only; including some negative or zero changes
     would demonstrate the plot type's full capability
+  image_description: The plot displays a horizontal dumbbell chart showing employee
+    satisfaction scores across 10 departments. Each department is represented on the
+    y-axis, with satisfaction scores on the x-axis ranging from 50 to 95. Yellow dots
+    represent "Before" scores and dark blue dots represent "After" scores, connected
+    by thin gray lines. The chart is sorted by improvement magnitude (largest improvement
+    at top - Customer Support, smallest at bottom - Operations). The title "Employee
+    Satisfaction · dumbbell-basic · letsplot · pyplots.ai" is centered at the top.
+    A legend on the right identifies the Period (Before/After). The grid is subtle
+    with light gray lines, and the overall layout is clean and well-balanced.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, axis text at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all department labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points sized appropriately (size=8), connecting lines subtle (size=1.5)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow (#FFD43B) and blue (#306998) are colorblind-safe, excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though some empty space on the right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Satisfaction Score" and "Department"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Y-axis grid disabled which is good, but legend could be positioned
+          closer to data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dumbbell/connected dot plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis (horizontal orientation as
+          spec prefers)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Two distinct colors for start/end, connecting lines, sorted by difference
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis limits [50, 95] show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Before/After periods
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Employee Satisfaction · dumbbell-basic · letsplot
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying improvement magnitudes, all positive changes (could
+          show some negative for more variety)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction before/after policy changes is a realistic,
+          comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Satisfaction scores 55-88 are plausible, though all improvements
+          being positive is slightly idealistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with geom_segment and geom_point, proper
+          theming with theme_minimal and element customization
+  verdict: APPROVED
diff --git a/plots/dumbbell-basic/metadata/matplotlib.yaml b/plots/dumbbell-basic/metadata/matplotlib.yaml
index 043bee30b0..795b1b70a7 100644
--- a/plots/dumbbell-basic/metadata/matplotlib.yaml
+++ b/plots/dumbbell-basic/metadata/matplotlib.yaml
@@ -15,3 +15,15 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: 'The plot shows a horizontal dumbbell chart displaying employee
+    satisfaction scores before and after workplace policy changes across 8 departments.
+    Each row represents a department (Sales, Finance, Product, Marketing, Operations,
+    Engineering, Customer Support, HR) with the y-axis showing "Department" and x-axis
+    showing "Satisfaction Score" ranging from 30 to 100. Two colored dots are connected
+    by a thin gray line for each department: blue dots (labeled "Before") on the left
+    and yellow dots (labeled "After") on the right. The data is sorted by the magnitude
+    of change, with HR showing the largest improvement at the bottom. The title follows
+    the correct format "dumbbell-basic · matplotlib · pyplots.ai". The legend is positioned
+    in the lower right corner. All dots are clearly visible with white edge colors,
+    and the grid lines are subtle vertical dashed lines.'
+  verdict: APPROVED
diff --git a/plots/dumbbell-basic/metadata/plotly.yaml b/plots/dumbbell-basic/metadata/plotly.yaml
index 16b071624c..3de650a61a 100644
--- a/plots/dumbbell-basic/metadata/plotly.yaml
+++ b/plots/dumbbell-basic/metadata/plotly.yaml
@@ -30,3 +30,172 @@ review:
     would demonstrate fuller data range
   - Grid shows only vertical lines; adding horizontal gridlines at low alpha could
     aid reading values
+  image_description: 'The plot displays a horizontal dumbbell chart showing employee
+    satisfaction scores before and after policy changes across 8 departments. Each
+    department is represented on the y-axis with categories sorted by improvement
+    magnitude (Customer Support showing largest improvement at top, Finance smallest
+    at bottom). Blue dots (Python Blue #306998) represent "Before" scores and yellow
+    dots (Python Yellow #FFD43B) represent "After" scores, connected by thin gray
+    lines. The x-axis shows "Satisfaction Score" ranging from 40 to 90. The title
+    "Employee Satisfaction · dumbbell-basic · plotly · pyplots.ai" is centered at
+    the top with a horizontal legend below it showing "Before" and "After" markers.
+    The background is clean white with subtle vertical gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, department names well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size 18 is well-suited for 8 categories, connecting lines
+          visible but subtle
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe, high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, left margin accommodates long department
+          names, slight excess space on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Satisfaction Score" is descriptive but lacks units (e.g., "/100"
+          or "%")'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha 0.1, legend well-placed; however grid is vertical
+          only
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dumbbell/connected dot plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis (horizontal orientation as
+          spec prefers)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors for start/end dots, sorted by difference, thin connecting
+          lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range [35, 95] shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Before" and "After" with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "{topic} · {spec-id} · {library} · pyplots.ai" format correctly
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied improvements across departments, all positive changes;
+          could show one negative change for full range
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction before/after policy changes is a perfect real-world
+          scenario matching spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Satisfaction scores 45-85 are realistic percentages
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → sort → figure → traces → layout
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no explicit seed comment;
+          minor deduction
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported, which is used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/dumbbell-basic/metadata/plotnine.yaml b/plots/dumbbell-basic/metadata/plotnine.yaml
index 659c1aed4a..935b131e6a 100644
--- a/plots/dumbbell-basic/metadata/plotnine.yaml
+++ b/plots/dumbbell-basic/metadata/plotnine.yaml
@@ -26,3 +26,176 @@ review:
     it is a code standard)
   - Horizontal grid lines removed but vertical grid lines kept - for a horizontal
     dumbbell chart, horizontal grid lines would help read values more easily
+  image_description: The plot displays a horizontal dumbbell chart comparing employee
+    satisfaction scores before and after workplace policy changes across 8 departments.
+    Blue dots (#306998) represent "Before" scores and yellow dots (#FFD43B) represent
+    "After" scores, connected by subtle gray lines. Departments are listed on the
+    y-axis (HR, Customer Support, Engineering, Operations, Marketing, Product, Finance,
+    Sales) and satisfaction scores (30-100) on the x-axis. The data is sorted by improvement
+    magnitude with HR and Customer Support showing the largest gains at the bottom.
+    The title correctly displays "dumbbell-basic · plotnine · pyplots.ai". A legend
+    on the right distinguishes "After" and "Before" periods. Text is well-sized and
+    readable, with a clean minimal theme and subtle vertical grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, axis text at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dots sized appropriately (size=6) for 8 categories, connecting lines
+          subtle
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe, high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of 16:9 canvas, balanced margins, plot fills space well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Satisfaction Score", "Department") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend well placed, but horizontal grid lines removed leaving only
+          vertical lines which is unusual for horizontal chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dumbbell/connected dot plot chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis (horizontal orientation as
+          specified)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Two dots with distinct colors, connecting lines, sorted by difference
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis limits (30-100) show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Before" and "After" with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "dumbbell-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varying magnitudes of change (small: Sales 6 pts, large: HR
+          28 pts), all positive improvements, good spread'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction scores before/after policy changes - real,
+          comprehensible scenario matching spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Satisfaction scores 40-88 are realistic percentages
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed set (data is deterministic, but best practice to include
+          seed for consistency)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics (ggplot, aes, geom_segment, geom_point,
+          scale_color_manual, theme_minimal), but no advanced features like faceting
+          or statistical transformations
+  verdict: APPROVED
diff --git a/plots/dumbbell-basic/metadata/seaborn.yaml b/plots/dumbbell-basic/metadata/seaborn.yaml
index 24539e9909..22af366609 100644
--- a/plots/dumbbell-basic/metadata/seaborn.yaml
+++ b/plots/dumbbell-basic/metadata/seaborn.yaml
@@ -25,3 +25,177 @@ review:
     color palettes
   - X-axis label could be more descriptive (e.g., Employee Satisfaction Score instead
     of just Satisfaction Score)
+  image_description: The plot displays a horizontal dumbbell chart comparing employee
+    satisfaction scores before and after policy changes across 10 departments. Blue
+    dots (#306998) represent "Before Policy" values and yellow dots (#FFD43B) with
+    gray edge outlines represent "After Policy" values. Gray connecting lines (#888888)
+    link each pair of dots. Departments are listed on the y-axis (HR, Customer Support,
+    Marketing, IT, Research, Legal, Finance, Operations, Sales, Engineering), sorted
+    by the magnitude of improvement (ascending). The x-axis shows "Satisfaction Score
+    (%)" ranging from approximately 50 to 90. The title "dumbbell-basic · seaborn
+    · pyplots.ai" appears at the top. A legend in the lower right identifies the two
+    data series. A subtle dashed vertical grid with alpha transparency aids readability.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size (s=400) appropriate for 10 data points, connecting lines
+          visible but subtle
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and highly
+          distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend placed appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: X-axis has units "(%)" but could be more descriptive (e.g., "Employee
+          Satisfaction Score (%)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3, dashed), legend well placed but could
+          be positioned in upper left to avoid being near data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct dumbbell/connected dot plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis as per spec recommendation
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Horizontal orientation, distinct colors, sorted by difference, thin
+          connecting lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (45-95) shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Before Policy" and "After Policy"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "dumbbell-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation in both start/end values and differences; all departments
+          show improvement but with varying magnitudes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction before/after policy changes is a realistic,
+          comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Satisfaction scores 52-88% are realistic; improvements of 13-19 points
+          are plausible
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only used imports (matplotlib.pyplot, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.scatterplot which is basic seaborn usage; could have used
+          seaborn's built-in styling or color palettes more distinctively
+  verdict: APPROVED
diff --git a/plots/ecdf-basic/metadata/altair.yaml b/plots/ecdf-basic/metadata/altair.yaml
index 02362fcaad..891a096da1 100644
--- a/plots/ecdf-basic/metadata/altair.yaml
+++ b/plots/ecdf-basic/metadata/altair.yaml
@@ -26,3 +26,171 @@ review:
   - Data uses generic normal distribution rather than a real-world scenario context
   - Could leverage more Altair-specific features like interactive selections or layered
     annotations
+  image_description: The plot displays a basic ECDF with a blue step line (#306998)
+    on a white background. The x-axis is labeled "Value" and ranges from approximately
+    10 to 98. The y-axis is labeled "Cumulative Proportion" and ranges from 0.00 to
+    1.00 with increments of 0.05. The title "ecdf-basic · altair · pyplots.ai" appears
+    at the top center. Subtle dashed grid lines aid in reading values. The step function
+    shows the characteristic S-curve shape of a normal distribution, starting near
+    0 on the left, rising through the middle values, and approaching 1.0 on the right.
+    The line uses step-after interpolation, creating the proper staircase pattern
+    expected from an ECDF.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (28pt), axis labels (22pt) and tick labels (18pt)
+          are all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is excellent for visibility, step function clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, high contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills the canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels ("Value", "Cumulative Proportion") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3, dashed), no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ECDF step function visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values on X-axis, cumulative proportion on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function, Y-axis 0-1, grid lines for percentile reading
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis correctly ranges 0-1
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (N/A)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "ecdf-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full distribution shape, percentiles readable, normal distribution
+          characteristics visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Uses generic normal distribution data (loc=50, scale=15), plausible
+          but not a real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 200 data points ideal for visualization, values in sensible range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → chart → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses Altair's declarative encoding and step-after interpolation,
+          tooltips included, but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/ecdf-basic/metadata/bokeh.yaml b/plots/ecdf-basic/metadata/bokeh.yaml
index adb0e22122..bff5579bd4 100644
--- a/plots/ecdf-basic/metadata/bokeh.yaml
+++ b/plots/ecdf-basic/metadata/bokeh.yaml
@@ -26,3 +26,172 @@ review:
     example
   - Could add hover tooltips to show exact percentile values, which is a key Bokeh
     strength
+  image_description: The plot displays a classic ECDF (Empirical Cumulative Distribution
+    Function) step function rendered in blue (#306998). The x-axis is labeled "Value"
+    ranging from approximately 10 to 90, and the y-axis is labeled "Cumulative Proportion"
+    ranging from 0 to 1. The title "ecdf-basic · bokeh · pyplots.ai" appears in the
+    top-left corner. The step function shows the characteristic S-curve shape of a
+    normal distribution, starting near 0 at low values and approaching 1 at high values.
+    The grid lines are subtle and dashed. The plot uses proper step-wise increments,
+    clearly showing each data point as a horizontal step followed by a vertical rise.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable at full size, though tick labels
+          could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 4 is well-suited for the data density, step function
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, minor excess whitespace on right edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Value", "Cumulative Proportion") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha=0.3 and dashed style, no legend needed
+          for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ECDF step function implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly sorted on X, cumulative proportion on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function increases by 1/n at each point, Y ranges 0-1, grid
+          lines present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis set to (0, 1.05) showing all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series (full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "ecdf-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows normal distribution ECDF well, but single distribution only
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic normal distribution (mean=50, std=15), plausible but not
+          a specific real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 200 observations is ideal for ECDF, values in reasonable range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and proper Bokeh figure setup, also generates
+          HTML for interactivity, but could leverage more Bokeh-specific features
+          like hover tools
+  verdict: APPROVED
diff --git a/plots/ecdf-basic/metadata/highcharts.yaml b/plots/ecdf-basic/metadata/highcharts.yaml
index 5d7bf73d90..e246738313 100644
--- a/plots/ecdf-basic/metadata/highcharts.yaml
+++ b/plots/ecdf-basic/metadata/highcharts.yaml
@@ -26,3 +26,177 @@ review:
   - Axis labels lack units (e.g., Value (units) would be more descriptive)
   - Data uses a simple normal distribution; a slightly more interesting distribution
     would better demonstrate ECDF capabilities
+  image_description: The plot displays an ECDF (Empirical Cumulative Distribution
+    Function) step chart on a white background. The title "ecdf-basic · highcharts
+    · pyplots.ai" is prominently displayed at the top in bold black text. The X-axis
+    is labeled "Value" and ranges from approximately 10 to 86, representing the sample
+    values from a normal distribution. The Y-axis is labeled "Cumulative Proportion"
+    and correctly ranges from 0 to 1. The ECDF is rendered as a blue (#306998 Python
+    Blue) step function line that rises from left to right in a characteristic S-curve
+    shape typical of normally distributed data. Dashed gray grid lines are present
+    on both axes to aid in reading values. A legend showing "ECDF" appears at the
+    bottom center. The chart uses the step="left" configuration, creating proper vertical
+    jumps at each data point.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 72px, axis labels at 48px, tick labels at 36px - all perfectly
+          readable at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width of 6px is good for the step function, markers disabled
+          as appropriate for ECDF
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, adequate margins; plot fills canvas well but legend
+          position at bottom center is slightly awkward
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Value" and "Cumulative Proportion" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed style and 0.15 alpha, but legend placement
+          at bottom overlaps slightly with x-axis label area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ECDF step chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values on X-axis, cumulative proportion on Y-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function increases by 1/n at each point, y-axis 0-1, grid lines
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis correctly fixed at 0-1, X-axis shows full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend label "ECDF" is accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: ecdf-basic · highcharts · pyplots.ai'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows the characteristic S-curve shape of normal distribution; 150
+          points is appropriate; could show more interesting distribution features
+          like asymmetry
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Normal distribution with mean 50, std 15 is a reasonable example
+          but quite generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values between ~10-86 are sensible for the chosen parameters
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no unnecessary functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts step line chart with proper configuration; could
+          leverage more interactive features like tooltips or zoom for HTML version
+  verdict: APPROVED
diff --git a/plots/ecdf-basic/metadata/letsplot.yaml b/plots/ecdf-basic/metadata/letsplot.yaml
index d44fa5f5c3..ccdbfb646a 100644
--- a/plots/ecdf-basic/metadata/letsplot.yaml
+++ b/plots/ecdf-basic/metadata/letsplot.yaml
@@ -23,3 +23,148 @@ review:
   weaknesses:
   - Grid lines are too subtle (dashed at 0.5 width) making percentile reading difficult
     - the spec notes grid lines should help with reading specific percentile values
+  image_description: The plot displays an ECDF (Empirical Cumulative Distribution
+    Function) showing web service response times. The chart uses a dark blue (#306998)
+    step line against a white background with light gray dashed grid lines. The X-axis
+    is labeled "Response Time (ms)" ranging from 0 to ~270ms, and the Y-axis shows
+    "Cumulative Proportion" from 0 to 1 with breaks at 0.25, 0.5, 0.75, and 1.0. The
+    title "ecdf-basic · letsplot · pyplots.ai" appears at the top. The step function
+    shows a steep initial rise (reflecting the exponential distribution of fast responses)
+    that gradually flattens, with a secondary steeper section around 180-220ms (reflecting
+    the normal distribution of slower responses).
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text readable at full size, appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: step line perfectly visible with size=2
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: single blue color is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive labels with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: grid lines are barely visible (too subtle)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct ECDF step function
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: step function, 0-1 y-axis, grid present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: shows full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows mixed distribution (exponential + normal)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: web service response times is excellent
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 0-270ms is realistic for web latency
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses geom_step correctly
+        score: 3
+        max: 5
+        passed: true
+        comment: functional but could use stat_ecdf()
+  verdict: APPROVED
diff --git a/plots/ecdf-basic/metadata/matplotlib.yaml b/plots/ecdf-basic/metadata/matplotlib.yaml
index ecd70de40b..c9f074085f 100644
--- a/plots/ecdf-basic/metadata/matplotlib.yaml
+++ b/plots/ecdf-basic/metadata/matplotlib.yaml
@@ -25,3 +25,172 @@ review:
     test scores, response times)
   - Does not use matplotlib.pyplot.ecdf (available since 3.8) which would demonstrate
     matplotlib-specific functionality
+  image_description: The plot displays a blue step function (ECDF curve) showing the
+    cumulative distribution of 200 random normal samples. The x-axis is labeled "Value"
+    ranging from approximately 10 to 90, and the y-axis is labeled "Cumulative Proportion"
+    ranging from 0 to 1.0. Three yellow dotted horizontal reference lines mark the
+    quartile positions at 0.25, 0.50, and 0.75. The title correctly reads "ecdf-basic
+    · matplotlib · pyplots.ai" at the top. The line width is appropriately thick (linewidth=3),
+    and the plot uses a subtle gray dashed grid for readability. The overall color
+    scheme is colorblind-safe with blue (#306998) for the main data and yellow (#FFD43B)
+    for the reference lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is appropriate for step function visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Value" is descriptive but lacks units; "Cumulative Proportion"
+          is good'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ECDF step function
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values on X-axis, cumulative proportion on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function increases by 1/n, Y-axis 0-1, grid lines present, quartile
+          references added
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis properly 0-1
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed for single series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows normal distribution ECDF well; quartile references are a nice
+          addition but data itself is somewhat generic
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic random normal samples; plausible but not tied to a specific
+          real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: loc=50, scale=15 produces sensible values in ~10-90 range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Basic matplotlib usage with ax.step(); could use matplotlib's built-in
+          ecdf from stats module (matplotlib 3.8+) or demonstrate more advanced matplotlib
+          features
+  verdict: APPROVED
diff --git a/plots/ecdf-basic/metadata/plotly.yaml b/plots/ecdf-basic/metadata/plotly.yaml
index 49bbe3ed80..ba1886411c 100644
--- a/plots/ecdf-basic/metadata/plotly.yaml
+++ b/plots/ecdf-basic/metadata/plotly.yaml
@@ -23,3 +23,168 @@ review:
   - Data scenario is generic normal distribution - could represent a real-world context
   - Could leverage Plotly interactivity features more (hover tooltips showing exact
     percentiles)
+  image_description: 'The plot displays a basic ECDF (Empirical Cumulative Distribution
+    Function) with a clean, professional appearance. The title "ecdf-basic · plotly
+    · pyplots.ai" appears at the top in dark blue text. The x-axis is labeled "Value"
+    and ranges from approximately 10 to 90, while the y-axis is labeled "Cumulative
+    Proportion" and correctly ranges from 0 to 1. The ECDF is rendered as a blue step
+    function line (color #306998) with appropriate thickness, showing the characteristic
+    S-shaped curve of a normal distribution. The plot uses a white background with
+    subtle gray grid lines. The layout is well-balanced with appropriate margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, tick labels at 28pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 4 is excellent for the step function, clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast on white
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good margins, plot fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units ("Value" could specify context)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with alpha 0.1, legend appropriately hidden for single
+          series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ECDF step function using shape="hv"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sorted values on X, cumulative proportion on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function increases by 1/n, y-axis 0-1, grid lines present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis explicitly set to [0, 1], X shows full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend hidden (appropriate for single series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exact format: "ecdf-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full distribution shape from 0 to 1, demonstrates S-curve
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Normal distribution with mean=50, sd=15 is plausible but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 200 observations is ideal, values in sensible range (10-90)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of go.Scatter with shape="hv" for steps, but could leverage
+          Plotly Express ecdf or add hover interactivity
+  verdict: APPROVED
diff --git a/plots/ecdf-basic/metadata/plotnine.yaml b/plots/ecdf-basic/metadata/plotnine.yaml
index 35c806bc0d..fd3d85db52 100644
--- a/plots/ecdf-basic/metadata/plotnine.yaml
+++ b/plots/ecdf-basic/metadata/plotnine.yaml
@@ -22,3 +22,175 @@ review:
   - Manual ECDF computation instead of using plotnine native stat_ecdf() which would
     be more idiomatic
   - Axis label Values is generic and lacks units or more descriptive context
+  image_description: The plot displays an ECDF (Empirical Cumulative Distribution
+    Function) as a blue step function on a clean minimal background. The title "ecdf-basic
+    · plotnine · pyplots.ai" is positioned at the top center. The x-axis is labeled
+    "Values" and shows data ranging from approximately 0 to 90, with tick marks at
+    25, 50, and 75. The y-axis is labeled "Cumulative Proportion" with values from
+    0.0 to 1.0 in 0.1 increments. The step function shows a classic S-shaped cumulative
+    distribution, starting near 0 on the left and rising to 1.0 on the right, with
+    the steepest increase around the median value of 50. Light gray grid lines aid
+    in reading values. The blue color (#306998) provides good contrast against the
+    white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable with
+          appropriate font sizes (24pt title, 20pt axis labels, 16pt tick labels)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step line is clearly visible with appropriate width (size=1.5)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units ("Values" could specify the
+          measurement unit)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3), but no legend needed for single series
+          - minor deduction for grid being slightly less visible than optimal
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ECDF step plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values on x-axis, cumulative proportion on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function increases by 1/n, y-axis 0-1, grid lines present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis correctly shows 0-1 range, x-axis shows full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "ecdf-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows normal distribution ECDF well, demonstrates S-curve shape;
+          could show more interesting features like different distribution shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Normal distribution centered at 50 with spread of 15 is plausible
+          generic data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 200 observations is good for ECDF visualization; values in reasonable
+          range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses plotnine's ggplot grammar correctly with geom_step, theme_minimal,
+          and proper element_text styling; however, could leverage stat_ecdf() for
+          native ECDF computation instead of manual calculation
+  verdict: APPROVED
diff --git a/plots/ecdf-basic/metadata/pygal.yaml b/plots/ecdf-basic/metadata/pygal.yaml
index 4e21df5ab2..152d53b39a 100644
--- a/plots/ecdf-basic/metadata/pygal.yaml
+++ b/plots/ecdf-basic/metadata/pygal.yaml
@@ -15,3 +15,171 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: 'The plot displays an ECDF (Empirical Cumulative Distribution
+    Function) as a step function on a white background. The line is rendered in a
+    light blue/steel blue color (#306998). The x-axis is labeled "Value" and ranges
+    from approximately -2.4 to 2.0, showing the sorted data values from a normal distribution.
+    The y-axis is labeled "Cumulative Proportion" and correctly ranges from 0 to 1.
+    The title follows the correct format: "ecdf-basic · pygal · pyplots.ai". Horizontal
+    and vertical grid lines are present and subtle. The step function clearly shows
+    the characteristic S-curve shape of a normal distribution''s CDF, starting near
+    0 at the left, passing through 0.5 around x=0, and approaching 1 on the right.'
+  criteria_checklist:
+    visual_quality:
+      score: 32
+      max: 35
+      items:
+      - id: VQ-01
+        name: Meaningful axis labels
+        score: 7
+        max: 7
+        passed: true
+        comment: '"Value" and "Cumulative Proportion" are clear and descriptive'
+      - id: VQ-02
+        name: No overlapping text
+        score: 6
+        max: 6
+        passed: true
+        comment: All text is readable, no overlaps
+      - id: VQ-03
+        name: Color choice
+        score: 5
+        max: 5
+        passed: true
+        comment: Python blue (#306998) used appropriately
+      - id: VQ-04
+        name: Clear data elements
+        score: 4
+        max: 5
+        passed: true
+        comment: Line is visible but could be slightly thicker for 4800px canvas
+      - id: VQ-05
+        name: Layout balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no cut-off content
+      - id: VQ-06
+        name: Grid subtlety
+        score: 3
+        max: 3
+        passed: true
+        comment: Grid lines are subtle and help read percentile values
+      - id: VQ-08
+        name: Image size
+        score: 2
+        max: 2
+        passed: true
+        comment: 4800x2700 px correct
+    spec_compliance:
+      score: 33
+      max: 35
+      items:
+      - id: SC-01
+        name: Correct plot type
+        score: 10
+        max: 10
+        passed: true
+        comment: ECDF step function correctly implemented
+      - id: SC-02
+        name: Data mapped correctly
+        score: 7
+        max: 7
+        passed: true
+        comment: Sorted values on X, cumulative proportion on Y
+      - id: SC-03
+        name: Required features present
+        score: 7
+        max: 7
+        passed: true
+        comment: Step function increments by 1/n, shows full distribution
+      - id: SC-04
+        name: Data range appropriate
+        score: 4
+        max: 4
+        passed: true
+        comment: Y-axis 0-1, X-axis shows all data points
+      - id: SC-05
+        name: Legend accuracy
+        score: 2
+        max: 4
+        passed: true
+        comment: Legend hidden (appropriate for single series, but spec doesn't require
+          hiding)
+      - id: SC-06
+        name: Title format correct
+        score: 3
+        max: 3
+        passed: true
+        comment: '"ecdf-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 14
+      max: 15
+      items:
+      - id: DQ-01
+        name: Feature coverage
+        score: 5
+        max: 6
+        passed: true
+        comment: Shows ECDF well, demonstrates normal distribution shape. Could show
+          more variation in step sizes to highlight the 1/n increment concept.
+      - id: DQ-02
+        name: Realistic context
+        score: 5
+        max: 5
+        passed: true
+        comment: Random samples from normal distribution is a standard, realistic
+          example
+      - id: DQ-03
+        name: Appropriate scale
+        score: 4
+        max: 4
+        passed: true
+        comment: 100 observations is ideal for visualization, values are sensible
+    code_quality:
+      score: 14
+      max: 15
+      items:
+      - id: CQ-01
+        name: KISS structure
+        score: 4
+        max: 4
+        passed: true
+        comment: 'Clean sequential structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducible
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Library idioms
+        score: 3
+        max: 3
+        passed: true
+        comment: Proper pygal XY chart usage with custom Style
+      - id: CQ-04
+        name: Clean imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style)
+      - id: CQ-05
+        name: Helpful comments
+        score: 1
+        max: 1
+        passed: true
+        comment: Comments explain step function construction
+      - id: CQ-06
+        name: No deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API used
+      - id: CQ-07
+        name: Output correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (correct)
+  verdict: APPROVED
diff --git a/plots/ecdf-basic/metadata/seaborn.yaml b/plots/ecdf-basic/metadata/seaborn.yaml
index d77790e61a..9bc10a0a83 100644
--- a/plots/ecdf-basic/metadata/seaborn.yaml
+++ b/plots/ecdf-basic/metadata/seaborn.yaml
@@ -28,3 +28,170 @@ review:
     to showcase library capabilities
   - Data context is generic (random normal samples) rather than a compelling real-world
     scenario
+  image_description: The plot displays a clear ECDF (Empirical Cumulative Distribution
+    Function) step function with a steel blue line (#306998) against a white background.
+    The x-axis is labeled "Value" and ranges from approximately 10 to 90, while the
+    y-axis is labeled "Cumulative Proportion" and correctly spans from 0 to 1.0. The
+    title "ecdf-basic · seaborn · pyplots.ai" is prominently displayed at the top.
+    The step function exhibits the characteristic S-curve shape expected from normally
+    distributed data (mean=50, std=15), with the steepest increase around the center
+    (values 40-60) and flattening tails at both ends. Subtle dashed grid lines (alpha=0.3)
+    aid in reading percentile values. The line width is appropriately thick (linewidth=3)
+    for visibility at high resolution.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is perfect for this data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Value" and "Cumulative Proportion" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ECDF step function
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values on X, cumulative proportion on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function, 0-1 y-axis range, grid for percentile reading
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis correctly set to 0-1
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: ecdf-basic · seaborn · pyplots.ai'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows normal distribution ECDF well, but could benefit from showing
+          specific percentile markers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Random samples from normal distribution is a classic statistical
+          example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values centered at 50 with std=15 are sensible, though generic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: sns.ecdfplot is current API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses sns.ecdfplot which is seaborn-specific, but no additional seaborn
+          features like stat parameter variations or complementary=True option
+  verdict: APPROVED
diff --git a/plots/elbow-curve/metadata/altair.yaml b/plots/elbow-curve/metadata/altair.yaml
index c7e6634522..d92127d65c 100644
--- a/plots/elbow-curve/metadata/altair.yaml
+++ b/plots/elbow-curve/metadata/altair.yaml
@@ -22,3 +22,174 @@ review:
   weaknesses:
   - No legend present to explain what the line/points represent
   - Y-axis label could note that inertia is unitless or define the metric more explicitly
+  image_description: The plot displays an elbow curve for K-means clustering with
+    a blue line connecting data points from k=1 to k=11. The x-axis shows "Number
+    of Clusters (k)" and the y-axis shows "Inertia (Within-Cluster Sum of Squares)"
+    ranging from 0 to 6,000. Blue circular markers (filled) are placed at each k value.
+    The elbow point at k=4 is highlighted with a larger yellow/gold marker with a
+    blue border, and annotated with "Optimal k = 4" text in blue. The title reads
+    "elbow-curve · altair · pyplots.ai" at the top. The curve shows the characteristic
+    sharp initial decline followed by diminishing returns - a classic elbow shape.
+    The background is white with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 300 appropriate for 11 data points, line width 4 is
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and high
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but missing units (inertia is unitless but could
+          note this)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.3 is subtle and non-distracting
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart with markers for elbow curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=k values, Y=inertia correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: markers at each k, smooth line, elbow
+          annotation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate y-axis padding (1.1x)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present (not strictly needed but could show line meaning)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "elbow-curve · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows sharp initial drop, clear elbow region, and diminishing returns
+          tail
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulates realistic K-means inertia decay with exponential function
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Inertia values 150-5200 are realistic for K-means clustering
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Declarative layered composition with mark_line + mark_point + mark_text,
+          uses Altair's encoding system, tooltips for interactivity, proper Title
+          configuration
+  verdict: APPROVED
diff --git a/plots/elbow-curve/metadata/bokeh.yaml b/plots/elbow-curve/metadata/bokeh.yaml
index 04567516d6..0561b6a63b 100644
--- a/plots/elbow-curve/metadata/bokeh.yaml
+++ b/plots/elbow-curve/metadata/bokeh.yaml
@@ -25,3 +25,179 @@ review:
     with significant empty space
   - Does not utilize Bokeh-specific interactive features like HoverTool to show exact
     values on hover
+  image_description: The plot displays an elbow curve for K-means clustering with
+    a light gray background (#fafafa). The title "elbow-curve · bokeh · pyplots.ai"
+    appears centered at the top in dark gray text. The X-axis is labeled "Number of
+    Clusters (k)" ranging from 1 to 10, and the Y-axis is labeled "Inertia (Within-Cluster
+    Sum of Squares)" ranging from approximately 0 to 5000. The curve shows blue (#306998)
+    circular markers connected by a blue line, starting high at k=1 (~5000) and dropping
+    sharply to k=4, then leveling off. The elbow point at k=4 is highlighted with
+    a larger yellow (#FFD43B) marker. A dashed yellow vertical line marks the elbow
+    position. An "Elbow Point" text annotation appears near k=4. The legend in the
+    top-right corner shows "Optimal k = 4" with a yellow marker. Grid lines are subtle
+    and dashed.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized appropriately (size=20 for data, size=30 for highlight)
+          for 10 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but plot area could be slightly larger; some empty
+          space on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Number of Clusters (k)" and "Inertia
+          (Within-Cluster Sum of Squares)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is positioned far in the top-right corner, isolated from the
+          plot area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct elbow curve/line plot with markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=k values, Y=inertia correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Line connecting points, markers at each k, elbow point annotated/highlighted
+          as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Optimal k = 4"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "elbow-curve · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows classic elbow curve pattern with sharp decrease before elbow
+          and diminishing returns after
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: K-means inertia values are realistic and the elbow pattern is clearly
+          visible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Inertia values (0-5000) and k range (1-10) are appropriate for clustering
+          analysis
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Implementation uses basic Bokeh features but does not leverage distinctive
+          Bokeh strengths like HoverTool for interactivity, which would be natural
+          for this library
+  verdict: APPROVED
diff --git a/plots/elbow-curve/metadata/highcharts.yaml b/plots/elbow-curve/metadata/highcharts.yaml
index 0d1439b1eb..16688cbbf9 100644
--- a/plots/elbow-curve/metadata/highcharts.yaml
+++ b/plots/elbow-curve/metadata/highcharts.yaml
@@ -25,3 +25,178 @@ review:
     area
   - Could leverage more Highcharts-specific interactive features for the interactive
     HTML output
+  image_description: The plot displays an elbow curve for K-means clustering with
+    a white background. The X-axis shows "Number of Clusters (k)" ranging from 1 to
+    12, and the Y-axis shows "Inertia (Within-cluster Sum of Squares)" ranging from
+    0 to 16,000. A blue line (#306998) connects circular blue markers at each k value,
+    showing the characteristic elbow curve shape with inertia decreasing rapidly from
+    ~15,000 at k=1 to ~4,500 at k=4, then leveling off. The elbow point at k=4 is
+    highlighted with a yellow diamond marker (#FFD43B) with blue outline, accompanied
+    by a yellow annotation label reading "Elbow Point (k=4)". A legend in the upper
+    right shows "Inertia" (blue circle) and "Optimal k = 4" (yellow diamond). The
+    title "elbow-curve · highcharts · pyplots.ai" is prominently displayed at the
+    top. Grid lines are subtle dashed gray lines. The layout is clean and professional.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          size with appropriate font sizes (48px title, 42px axis titles, 32px labels)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are well-sized (radius 14 for line, 24 for optimal point),
+          line width of 5 is clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses colorblind-safe palette (blue #306998 and yellow #FFD43B),
+          no red-green conflicts'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions with appropriate margins, but legend is slightly
+          far from the plot area in the upper right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with proper context: "Number of
+          Clusters (k)" and "Inertia (Within-cluster Sum of Squares)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle with dashed style which is good, but legend position
+          could be integrated better
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct elbow curve line chart with markers showing discrete k values
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis correctly shows k values (1-12), Y-axis correctly shows inertia
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: markers at each point, smooth connecting
+          line, annotated elbow point'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points, y-axis starts from 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Inertia" and "Optimal k = 4"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "elbow-curve · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows the characteristic elbow curve shape with clear diminishing
+          returns, elbow point is well-defined at k=4
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulated K-means inertia data is plausible, values range from ~15,000
+          to ~500 which is realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Inertia values are sensible for a clustering scenario
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts_core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: false
+        comment: Saves as plot.png correctly (already counted)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts annotations module for elbow point label, scatter
+          series for optimal marker, but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/elbow-curve/metadata/letsplot.yaml b/plots/elbow-curve/metadata/letsplot.yaml
index e04f968daa..6a232e773d 100644
--- a/plots/elbow-curve/metadata/letsplot.yaml
+++ b/plots/elbow-curve/metadata/letsplot.yaml
@@ -25,3 +25,174 @@ review:
   - Could use lets-plot interactive features like tooltips showing exact values on
     hover
   - Grid lines could be slightly more subtle (alpha currently at 0.5, could be 0.3)
+  image_description: The plot displays an elbow curve for K-means clustering. It shows
+    a line chart with blue circular markers at each data point, connected by a smooth
+    blue line. The x-axis is labeled "Number of Clusters (k)" ranging from 1 to 10,
+    and the y-axis is labeled "Inertia (Within-Cluster Sum of Squares)" ranging from
+    approximately 1,000 to 13,000. A vertical dashed yellow line marks k=4 as the
+    optimal elbow point, with a yellow diamond marker highlighting that specific data
+    point. The title "elbow-curve · letsplot · pyplots.ai" appears at the top in bold.
+    The curve clearly demonstrates the characteristic elbow shape with sharp decrease
+    from k=1 to k=4, then diminishing returns afterward. The background is clean with
+    subtle gray grid lines and a minimal theme.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold, axis labels are clearly readable, tick labels
+          are appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized for data density, elbow point highlighted
+          with contrasting diamond marker
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with context ("Within-Cluster Sum of Squares")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate but no legend present; while no legend is strictly
+          needed, the elbow point annotation could benefit from a small legend explaining
+          the yellow marker
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct elbow curve / line chart with markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=k values, Y=inertia correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has markers at each point, connecting line, elbow point highlighted,
+          vertical indicator line
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All k values (1-10) and inertia values fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed for single series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "elbow-curve · letsplot · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows elbow pattern well but could include more dramatic diminishing
+          returns after k=4
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer segmentation scenario with plausible inertia values
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic though slightly simplified for demonstration
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and lets_plot imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar with ggsize() and theme customization, but could
+          leverage more lets-plot specific features like tooltips or annotations
+  verdict: APPROVED
diff --git a/plots/elbow-curve/metadata/matplotlib.yaml b/plots/elbow-curve/metadata/matplotlib.yaml
index 38fec75318..0dc6a69add 100644
--- a/plots/elbow-curve/metadata/matplotlib.yaml
+++ b/plots/elbow-curve/metadata/matplotlib.yaml
@@ -24,3 +24,180 @@ review:
   - Legend is defined but not displayed (ax.legend() call is missing), though the
     label parameter and axvspan label are set
   - Y-axis label could include units or clarify unitless nature of inertia
+  image_description: The plot displays an elbow curve for K-Means clustering on a
+    16:9 landscape canvas with a white background. The X-axis shows "Number of Clusters
+    (k)" ranging from 1 to 10, and the Y-axis shows "Inertia (Within-Cluster Sum of
+    Squares)" ranging from approximately 500 to 5000. A dark blue line (#306998) connects
+    10 data points, each marked with yellow (#FFD43B) circular markers with blue borders.
+    The curve shows the characteristic elbow shape with a sharp decline from k=1 to
+    k=4, then a gradual decrease from k=4 onwards. The elbow point at k=4 is highlighted
+    with a larger yellow marker and annotated with "Elbow Point (k=4)" in bold blue
+    text with an arrow pointing to it. A subtle light blue shaded region spans from
+    k=4 to k=10 indicating the "diminishing returns" zone. The title "elbow-curve
+    · matplotlib · pyplots.ai" appears at the top. A subtle dashed grid is visible
+    in the background.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, annotation placed well away from data
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (s=400 for elbow, markersize=12 for line) are appropriately
+          sized for 10 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive but Y-axis lacks specific units (though for
+          inertia, unitless is acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend is shown despite having
+          a label parameter and a labeled axvspan
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct elbow curve line plot with markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=k values, Y=inertia correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: markers at data points, smooth connecting
+          line, elbow point annotation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data clearly with appropriate range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (legend not required for this single-series plot)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "elbow-curve · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows elbow curve shape well with clear elbow point; slight deduction
+          as the curve could show more dramatic variation in the "tail" to better
+          illustrate diminishing returns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Inertia values (5000 down to ~700) are realistic for K-means clustering
+          scenarios
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are sensible, though the starting inertia of 5000 is somewhat
+          arbitrary
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs are current
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses matplotlib features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of annotations with arrowprops, axvspan for shading, marker
+          customization. Could use more distinctive features like spines customization
+          or secondary annotations.
+  verdict: APPROVED
diff --git a/plots/elbow-curve/metadata/plotly.yaml b/plots/elbow-curve/metadata/plotly.yaml
index 65566746de..6e5e290f2b 100644
--- a/plots/elbow-curve/metadata/plotly.yaml
+++ b/plots/elbow-curve/metadata/plotly.yaml
@@ -24,3 +24,179 @@ review:
   - Grid visibility is too subtle (alpha 0.1) making it hard to trace values
   - Interactive HTML is generated but not being evaluated; could leverage more Plotly-specific
     interactive features
+  image_description: The plot displays an elbow curve for K-means clustering with
+    a clean white background. The x-axis shows "Number of Clusters (k)" ranging from
+    1 to 12, and the y-axis shows "Within-Cluster Sum of Squares (Inertia)" ranging
+    from 0 to about 5000. The main curve is rendered in blue (#306998) with connected
+    lines and circular markers at each k value. The curve shows the characteristic
+    exponential decay pattern of inertia decreasing as k increases. At k=4, there
+    is a yellow/gold highlighted marker indicating the elbow point, with an annotation
+    box pointing to it that reads "Elbow Point k = 4" in blue text with a border.
+    A legend in the upper right shows "Inertia" (blue line with marker) and "Elbow
+    (k=4)" (yellow circle). The title "elbow-curve · plotly · pyplots.ai" is centered
+    at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized at 16px with 4px lines, perfectly visible for 12 data
+          points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good margins but slightly more whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Number of Clusters (k)" and "Within-Cluster
+          Sum of Squares (Inertia)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle at alpha 0.1 but legend placement in upper right overlaps
+          with where data would be if continued; legend could be positioned better
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct elbow curve (line + scatter) chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=k values, Y=inertia - correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: markers at data points, smooth connecting
+          line, elbow point annotation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 k values visible, y-axis shows full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Inertia" and "Elbow (k=4)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "elbow-curve · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows elbow shape clearly with k=4 as optimal point, but the elbow
+          is relatively subtle
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses scikit-learn-style inertia values with plausible exponential
+          decay pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Inertia values (100-5000) and k range (1-12) are realistic for clustering
+          analysis
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png at correct resolution (4800x2700 via scale=3)
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Does not generate HTML for interactivity. The code includes write_html()
+          but this is a standard Plotly feature; hover templates are nice but basic.
+          Could use more advanced Plotly features like range sliders, buttons, or
+          secondary traces for rate-of-change visualization.
+  verdict: APPROVED
diff --git a/plots/elbow-curve/metadata/plotnine.yaml b/plots/elbow-curve/metadata/plotnine.yaml
index 17fef19033..61b1c0a975 100644
--- a/plots/elbow-curve/metadata/plotnine.yaml
+++ b/plots/elbow-curve/metadata/plotnine.yaml
@@ -26,3 +26,172 @@ review:
     ..., 10) which would be more appropriate for discrete k values
   - The realistic context is somewhat generic - could use a more specific domain scenario
     in comments
+  image_description: The plot displays an elbow curve for K-means clustering on a
+    clean white background. A blue line (#306998) connects 10 data points from k=1
+    to k=10, with circular markers at each discrete k value. The curve starts at approximately
+    1000 inertia for k=1 and decreases sharply until k=4, then flattens out to around
+    60-70 for k=10. A yellow/gold dashed vertical line marks the optimal k=4 position,
+    with a yellow "Optimal k = 4" annotation placed to the right of this line. The
+    title "elbow-curve · plotnine · pyplots.ai" appears at the top in bold black text.
+    The x-axis is labeled "Number of Clusters (k)" and the y-axis is labeled "Inertia
+    (Within-Cluster Sum of Squares)". A subtle gray grid is visible in the background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (24pt), axis titles are clear (20pt), tick
+          labels readable (16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width (size=2) and point size (size=5) are well-suited for 10
+          data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, balanced margins, plot fills appropriate space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis label is descriptive but lacks units (inertia is unitless,
+          but could note "arbitrary units")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: 'Grid is subtle (alpha=0.3), but no legend needed; minor: grid could
+          be slightly more visible'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct elbow curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=k values, Y=inertia correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has markers, connecting line, and annotated optimal k as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 10 k values visible, y-axis shows full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed for single series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "elbow-curve · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear elbow shape with sharp decrease then plateau
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Simulated K-means inertia values are plausible but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Inertia values (1000 down to ~60) are realistic for clustering
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of plotnine grammar: ggplot + aes + geom_line + geom_point
+          + geom_vline + annotate + theme_minimal + theme customization. Could have
+          used scale_x_continuous for integer breaks.'
+  verdict: APPROVED
diff --git a/plots/elbow-curve/metadata/pygal.yaml b/plots/elbow-curve/metadata/pygal.yaml
index 3c967c025f..30f77f808c 100644
--- a/plots/elbow-curve/metadata/pygal.yaml
+++ b/plots/elbow-curve/metadata/pygal.yaml
@@ -23,3 +23,181 @@ review:
   - Legend positioned quite far from the plot area at the bottom
   - Could use pygal built-in annotation or tooltip features to enhance the elbow point
     indication
+  image_description: The plot displays an elbow curve for K-means clustering with
+    a blue line connecting data points from k=1 to k=10. The y-axis shows "Inertia
+    (Within-cluster Sum of Squares)" ranging from 0 to approximately 4400, while the
+    x-axis shows "Number of Clusters (k)" with integer values 1-10. Blue circular
+    markers appear at each k value, connected by a smooth blue line. A larger yellow/gold
+    marker highlights the optimal elbow point at k=4. The title "elbow-curve · pygal
+    · pyplots.ai" appears at the top. A legend at the bottom shows "Inertia" (blue)
+    and "Optimal k" (yellow). The background is clean white with subtle horizontal
+    grid lines. The characteristic elbow shape is clearly visible with a steep decline
+    from k=1 (4200) to k=4 (680), then flattening out.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          size. Font sizes are appropriate.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized and visible. The elbow point marker is distinctively
+          larger. Line connecting points is clear.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow/gold colors are colorblind-safe and provide good
+          contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with balanced margins. Plot area fills approximately
+          60% of canvas.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with context: "Number of Clusters
+          (k)" and "Inertia (Within-cluster Sum of Squares)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Horizontal grid lines are subtle. Legend at bottom is clear but placed
+          quite far from the plot area.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart with markers showing elbow curve.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis correctly shows k values, Y-axis shows inertia.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has markers at each data point, smooth connecting line, and highlighted
+          optimal k value as specified.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, X-axis shows all k values 1-10, proper range
+          set.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Inertia" and "Optimal k" series.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows exact format: "elbow-curve · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Data clearly demonstrates the elbow pattern with steep initial decline
+          and gradual flattening. The elbow point at k=4 is evident.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Customer segmentation scenario is plausible. Inertia values follow
+          realistic K-means behavior.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Inertia values (365-4200) are realistic for clustering analysis.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → style → chart → save. No functions
+          or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded values), but no random seed comment
+          needed since no randomness used.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style are imported, both are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html correctly.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal XY chart, custom Style, legend positioning, dots_size
+          customization. Could have used more pygal-specific features like tooltips
+          or value formatting.
+  verdict: APPROVED
diff --git a/plots/elbow-curve/metadata/seaborn.yaml b/plots/elbow-curve/metadata/seaborn.yaml
index c3eddf85c4..b02174f4f8 100644
--- a/plots/elbow-curve/metadata/seaborn.yaml
+++ b/plots/elbow-curve/metadata/seaborn.yaml
@@ -24,3 +24,172 @@ review:
     more distinctively
   - Y-axis label could include units in parentheses format even if unitless (e.g.,
     Inertia (WCSS))
+  image_description: 'The plot displays an elbow curve for K-means clustering with
+    a white grid background. A blue line (color #306998) connects 10 data points from
+    k=1 to k=10, each marked with yellow/gold circular markers with blue borders.
+    The curve shows the characteristic elbow shape: high inertia (~2800) at k=1, sharp
+    decrease through k=2 (~1650), k=3 (~950), reaching the elbow at k=4 (~520), then
+    diminishing returns through k=10 (~305). The elbow point at k=4 is annotated with
+    bold blue text "Elbow Point (k=4)" and an arrow. The title "elbow-curve · seaborn
+    · pyplots.ai" appears at the top. X-axis labeled "Number of Clusters (k)", Y-axis
+    labeled "Inertia (Within-Cluster Sum of Squares)". Layout is well-balanced with
+    proper margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are large (size 15) and clearly visible, line width 3 is
+          appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, annotation positioned cleanly
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive but no units (inertia is unitless, but k could
+          be clearer)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid alpha is 0.3 which is good, but no legend needed here
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct elbow curve/line plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=k values, Y=inertia correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: markers at each point, smooth connecting
+          line, annotated elbow point'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately, k from 1-10 as specified
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "elbow-curve · seaborn · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear elbow pattern with sharp decrease then diminishing returns.
+          Could show multiple potential elbows for richer example.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic K-means inertia values showing typical clustering scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 300-2800 are realistic for typical clustering problems
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern seaborn API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot which is basic; seaborn's strength is statistical
+          visualizations. Could use sns.regplot or statistical features.
+  verdict: APPROVED
diff --git a/plots/errorbar-asymmetric/metadata/altair.yaml b/plots/errorbar-asymmetric/metadata/altair.yaml
index 86bfe67ef9..8f629ce509 100644
--- a/plots/errorbar-asymmetric/metadata/altair.yaml
+++ b/plots/errorbar-asymmetric/metadata/altair.yaml
@@ -25,3 +25,174 @@ review:
   - Data scenario could be more specific (e.g., Manufacturing Batch Quality) rather
     than generic Product A-F
   - Could leverage Altair selection/highlight features for enhanced interactivity
+  image_description: The plot displays 6 products (Product A through F) on the x-axis
+    with quality scores on the y-axis (range 50-104). Each data point is shown as
+    a filled yellow circle with a blue border. Blue vertical error bars extend asymmetrically
+    above and below each central point, with horizontal caps (tick marks) at both
+    ends clearly marking the error bounds. The title "errorbar-asymmetric · altair
+    · pyplots.ai" is centered at the top. The y-axis label reads "Quality Score (10th–90th
+    Percentile)". An italicized annotation in the lower-right corner states "Error
+    bars show 10th–90th percentile range". The grid is subtle (low opacity), and the
+    overall color scheme uses Python blue (#306998) for bars and Python yellow (#FFD43B)
+    for points.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title ~28pt, axis labels ~22pt, tick labels ~18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points size=300 with filled markers, error bars strokeWidth=3, caps
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with context "(10th–90th Percentile)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriately subtle (alpha 0.3), but no legend present (annotation
+          serves as explanation)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct asymmetric error bar plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=categories, Y=central values, error bars correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has caps on error bars, annotation explaining bounds, asymmetric
+          errors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis domain [50, 105] shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Annotation clearly explains what error bars represent
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows variety: some products have larger upper errors, others larger
+          lower errors, demonstrating true asymmetry'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Product quality scores is plausible but generic (not a specific real-world
+          scenario)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Quality scores 68-92 with reasonable error ranges are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → layers → combine → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of layered composition (mark_rule + mark_tick + mark_point),
+          tooltips for interactivity, but could have used selection for highlighting
+  verdict: APPROVED
diff --git a/plots/errorbar-asymmetric/metadata/bokeh.yaml b/plots/errorbar-asymmetric/metadata/bokeh.yaml
index 1c194d87c6..f8d47cdd97 100644
--- a/plots/errorbar-asymmetric/metadata/bokeh.yaml
+++ b/plots/errorbar-asymmetric/metadata/bokeh.yaml
@@ -24,3 +24,177 @@ review:
   - Legend text appears smaller than optimal relative to the large canvas size
   - Manual cap implementation could leverage Bokeh's built-in Whisker head customization
     more elegantly
+  image_description: The plot displays quarterly revenue forecasts from Q1 2024 to
+    Q2 2025 with asymmetric error bars. Yellow/gold circular markers represent median
+    forecast values, connected by blue vertical error bars with horizontal caps at
+    both ends. The error bars clearly demonstrate asymmetry - with larger downside
+    uncertainty (extending further below) than upside potential. The title "errorbar-asymmetric
+    · bokeh · pyplots.ai" appears at the top. The x-axis shows quarters, y-axis shows
+    "Revenue Forecast ($ millions)" ranging from ~10 to ~21. A legend in the top-left
+    explains the markers as "Median forecast (10th-90th percentile)". The background
+    is white with subtle dashed grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is clearly readable; title, axis labels, and tick labels
+          are appropriately sized for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers and error bars are well-sized and visible; caps could be
+          slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space with appropriate margins and balanced layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Revenue Forecast ($ millions)",
+          "Quarter"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend text is noticeably smaller than other text elements; appears
+          undersized relative to the canvas
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct asymmetric error bar plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (quarters) and Y (revenue) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: asymmetric error bars, visible caps,
+          legend explaining bounds'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points and error bars fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes "10th-90th percentile"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "errorbar-asymmetric · bokeh · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear asymmetry in all error bars with varying magnitudes across
+          quarters
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial forecasting with asymmetric risk is a realistic, neutral
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values ($10-21M) and error magnitudes are realistic for business
+          forecasts
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Bokeh API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses Whisker model correctly but implementation is fairly basic;
+          manual cap drawing instead of using Whisker's built-in head properties more
+          creatively
+  verdict: APPROVED
diff --git a/plots/errorbar-asymmetric/metadata/highcharts.yaml b/plots/errorbar-asymmetric/metadata/highcharts.yaml
index 7d84a84ebd..dc9d24477b 100644
--- a/plots/errorbar-asymmetric/metadata/highcharts.yaml
+++ b/plots/errorbar-asymmetric/metadata/highcharts.yaml
@@ -21,3 +21,185 @@ review:
   - Subtitle provides context for the confidence interval interpretation
   weaknesses:
   - Library version shows as "unknown" in the header comment (minor metadata issue)
+  image_description: The plot displays a quarterly sales forecast with asymmetric
+    error bars on a white background. Six data points (Q1 2024 through Q2 2025) are
+    shown as yellow diamond markers with blue outlines, positioned along the x-axis
+    by quarter. Each point has a blue vertical error bar extending asymmetrically
+    above and below, with visible horizontal caps (whiskers) at both ends. The title
+    "errorbar-asymmetric · highcharts · pyplots.ai" appears at top center in bold,
+    with a subtitle explaining the 10th-90th percentile confidence intervals. The
+    y-axis shows "Sales Forecast (Million USD)" ranging from ~10.5 to 24, and the
+    x-axis shows "Quarter". A legend in the upper-right corner identifies "Point Estimate"
+    (diamond) and "10th-90th Percentile Range" (blue line). The layout is clean with
+    good use of canvas space and subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable.
+          Font sizes are appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Diamond markers are well-sized and visible. Error bars have good
+          stem width and visible caps. Markers could be slightly larger for optimal
+          visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe; no red-green
+          combinations.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with appropriate margins. Plot area uses
+          space well, though legend positioning slightly compact.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "(Million USD)"; X-axis has
+          "Quarter".
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate. Legend is functional but could have
+          slightly better symbol representation.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct asymmetric error bar plot using Highcharts native errorbar
+          series type.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (quarters) and Y (sales values) correctly assigned with proper
+          error bounds.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: asymmetric errors, visible caps, legend
+          explaining bounds, appropriate data points (6).'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis auto-scales appropriately to show all data and error ranges.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Point Estimate" and "10th-90th Percentile
+          Range".
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title follows format but header in file shows "unknown" for library
+          version.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear asymmetry with larger upside potential vs downside risk.
+          Good variety in error magnitudes across quarters.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial forecasting scenario is realistic and neutral. Sales figures
+          in millions USD are plausible.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for quarterly sales (12-18M range). Error bounds
+          are proportionally sensible.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart config → series →
+          export.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used; no unnecessary imports.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts Core API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Note: Header shows "highcharts unknown" for version.'
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses native Highcharts errorbar series type with whiskerLength and
+          stemWidth configuration. Good use of highcharts-more.js module.
+  verdict: APPROVED
diff --git a/plots/errorbar-asymmetric/metadata/letsplot.yaml b/plots/errorbar-asymmetric/metadata/letsplot.yaml
index 733e525588..d86ffedc1c 100644
--- a/plots/errorbar-asymmetric/metadata/letsplot.yaml
+++ b/plots/errorbar-asymmetric/metadata/letsplot.yaml
@@ -24,3 +24,181 @@ review:
     hover
   - Grid/Legend score reduced because there is no formal legend element (caption serves
     the purpose but is not a legend)
+  image_description: The plot displays a quarterly sales performance visualization
+    with asymmetric error bars. Four data points (Q1-Q4) are shown as blue circular
+    markers on a clean white background with subtle gray gridlines. Each quarter has
+    vertical error bars with horizontal caps at both ends - the asymmetry is clearly
+    visible where upper and lower bars have different lengths. Q4 shows the largest
+    upside potential (error bar extending to ~127), while Q3 shows the largest downside
+    risk (extending down to ~66). The title "errorbar-asymmetric · letsplot · pyplots.ai"
+    appears at the top in bold. The y-axis is labeled "Sales (thousands USD)" and
+    x-axis shows "Quarter". A caption at the bottom right explains "Error bars represent
+    10th-90th percentile forecast range". The color scheme uses a consistent blue
+    (#306998) for all elements.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large (~24pt), axis labels clearly readable (~20pt),
+          tick labels appropriately sized (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=6), error bars have good thickness (size=1.5),
+          caps clearly visible (width=0.3)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no colorblind issues, good contrast against
+          white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, minor excess whitespace at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units "Sales (thousands USD)", X-axis appropriately
+          labeled "Quarter"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend present - the caption serves
+          as explanation but a formal legend would be better
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct asymmetric error bar plot with points and error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=quarters (categorical), Y=sales values, error bounds correctly
+          mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has visible caps on error bars, includes caption explaining what
+          bounds represent (10th-90th percentile)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range [50, 140] shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Caption accurately describes the asymmetric bounds
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "errorbar-asymmetric · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear asymmetry with different upper/lower errors across quarters;
+          Q2 has large upside, Q3 has large downside - demonstrates the key feature
+          well but could show more extreme asymmetry
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales forecast with percentile ranges is a realistic business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Sales values (78-105k) are plausible; error ranges are reasonable
+          but Q4's upper bound of 127k seems high
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) even though data is deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot2-style grammar with geom_errorbar and geom_point,
+          theme customization, but could leverage more lets-plot specific features
+          like tooltips for the interactive HTML version
+  verdict: APPROVED
diff --git a/plots/errorbar-asymmetric/metadata/matplotlib.yaml b/plots/errorbar-asymmetric/metadata/matplotlib.yaml
index 32859ee4e0..805676b166 100644
--- a/plots/errorbar-asymmetric/metadata/matplotlib.yaml
+++ b/plots/errorbar-asymmetric/metadata/matplotlib.yaml
@@ -23,3 +23,174 @@ review:
   - Marker size (15) could be slightly larger for better visibility at full resolution
   - Legend could better explain what asymmetric bounds represent (the fill region
     is not explained)
+  image_description: The plot displays a clean asymmetric error bar visualization
+    with 8 data points representing monthly sales data (Jan-Aug). Each point is shown
+    as a solid blue (#306998) circle marker with thick vertical error bars extending
+    asymmetrically above and below. The error bars have visible horizontal caps at
+    both ends. A subtle light blue filled region connects all error bounds, showing
+    the confidence interval range across months. The title "errorbar-asymmetric ·
+    matplotlib · pyplots.ai" is prominently displayed at the top. The x-axis shows
+    months, y-axis shows "Sales (thousands USD)" ranging from 0 to 120. A legend in
+    the upper left explains "Median with 10th-90th percentile". The grid is subtle
+    with dashed lines and low opacity.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers (s=15) and error bars (elinewidth=3, capsize=10) are well-sized
+          and visible; slightly larger markers could improve visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Month" and "Sales (thousands USD)" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid has alpha=0.3 which is appropriate; however the legend text
+          "Median with 10th-90th percentile" is slightly cut-off/cramped
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct asymmetric error bar plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (categorical), Y=sales values with asymmetric errors correctly
+          assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has visible caps, legend explaining bounds, asymmetric error magnitudes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-120 shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes "10th-90th percentile"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear asymmetry (upper errors larger than lower), variety in
+          error magnitudes; could show more dramatic asymmetry in some points
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales performance is a real, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values 42-82 thousands USD are realistic for business data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses standard errorbar function correctly but fill_between is a nice
+          touch; could leverage more matplotlib-specific features like custom error
+          bar styling or annotations
+  verdict: APPROVED
diff --git a/plots/errorbar-asymmetric/metadata/plotly.yaml b/plots/errorbar-asymmetric/metadata/plotly.yaml
index 95bc2a7627..663e9bf8c6 100644
--- a/plots/errorbar-asymmetric/metadata/plotly.yaml
+++ b/plots/errorbar-asymmetric/metadata/plotly.yaml
@@ -26,3 +26,181 @@ review:
     data
   - Could leverage Plotly hover templates to show exact upper/lower error values on
     hover
+  image_description: The plot displays 6 products (A through F) on the x-axis with
+    their quarterly sales values on the y-axis (ranging 0-140 thousands USD). Each
+    product is represented by a blue circular marker (#306998) with asymmetric vertical
+    error bars extending above and below. The error bars have clear horizontal caps
+    at both ends. The title "errorbar-asymmetric · plotly · pyplots.ai" is centered
+    at the top. A legend in the upper-left corner explains "Median Sales (10th-90th
+    percentile)". An annotation in the lower-right states "Error bars show 10th-90th
+    percentile range". The background is white with subtle gray gridlines. The asymmetry
+    is clearly visible - for example, Product E shows a much larger upper error bar
+    (~35) than lower (~18), while Product C shows a larger lower error bar (~15) than
+    upper (~10).
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, ticks at 20pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 18 is appropriate for 6 data points, error bar thickness
+          of 3 and width of 12 for caps are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Quarterly Sales (thousands USD)", X-axis "Product
+          Category" is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriately subtle (alpha 0.1), but legend placement could
+          be improved - it's positioned at the very top-left corner which feels slightly
+          disconnected
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct asymmetric error bar plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (categories), Y (central values), error_lower and error_upper correctly
+          mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has visible caps on error bars, legend explaining bounds, annotation
+          describing the intervals
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range [0, 150] shows all data clearly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes "Median Sales (10th-90th percentile)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows clear asymmetry: some products have larger upper errors (right-skewed
+          like Product E), others have larger lower errors (Product C), demonstrating
+          the full range of asymmetric error bar use cases'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Product sales with percentile-based confidence intervals is plausible,
+          though generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in tens of thousands USD with reasonable error ranges
+          are realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but code uses relative path without plots directory
+          context
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses go.Scatter with error_y for asymmetric errors, which is standard
+          Plotly. Also generates HTML for interactivity. Could have used hover templates
+          for enhanced interactivity.
+  verdict: APPROVED
diff --git a/plots/errorbar-asymmetric/metadata/plotnine.yaml b/plots/errorbar-asymmetric/metadata/plotnine.yaml
index b31509d5cf..d72db52942 100644
--- a/plots/errorbar-asymmetric/metadata/plotnine.yaml
+++ b/plots/errorbar-asymmetric/metadata/plotnine.yaml
@@ -23,3 +23,172 @@ review:
     in some with larger upside would better demonstrate the concept
   - 'Grid/legend scoring: caption is good but a proper legend entry could enhance
     clarity'
+  image_description: 'The plot displays 6 quarterly data points (Q1 2024 through Q2
+    2025) on a clean white background with a subtle gray grid. Each point is marked
+    with a solid blue circle (#306998) with asymmetric error bars extending above
+    and below. The error bars have horizontal caps at both ends. The downside error
+    bars are visibly larger than the upside bars, correctly representing conservative
+    financial projections with greater downside risk. The title "errorbar-asymmetric
+    · plotnine · pyplots.ai" is centered at the top. The y-axis is labeled "Revenue
+    (Million USD)" and the x-axis is labeled "Quarter". A caption at the bottom right
+    explains: "Error bars show 10th-90th percentile forecast range".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title ~24pt, axis labels ~20pt, tick labels ~16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points and error bars are well-sized, caps are visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but some unused space on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units "(Million USD)", X-axis is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but no legend present (caption substitutes
+          but not ideal)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct asymmetric error bar plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=quarters, Y=revenue, error bounds correctly computed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Asymmetric errors, caps visible, annotation explaining bounds
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible with appropriate range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Caption explains error bar meaning as spec recommends
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "errorbar-asymmetric · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows asymmetric errors clearly, but all bars follow same pattern
+          (downside > upside). More variation would demonstrate the concept better
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial forecasting with conservative projections is a real, neutral
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values (105-175M USD) are realistic for quarterly projections
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar (aes, geom_errorbar, geom_point, theme_minimal,
+          theme customization), but no advanced plotnine-specific features like faceting
+          or scales
+  verdict: APPROVED
diff --git a/plots/errorbar-asymmetric/metadata/seaborn.yaml b/plots/errorbar-asymmetric/metadata/seaborn.yaml
index 4cc50f4c4e..a434adf232 100644
--- a/plots/errorbar-asymmetric/metadata/seaborn.yaml
+++ b/plots/errorbar-asymmetric/metadata/seaborn.yaml
@@ -25,3 +25,176 @@ review:
     might integrate better with the plot
   - Relies heavily on matplotlib for the core errorbar functionality rather than showcasing
     seaborn-specific features
+  image_description: The plot displays an asymmetric error bar chart showing battery
+    life measurements across 8 device models (Model A through Model H). The plot uses
+    a consistent blue color (#306998) for both the scatter points and error bars.
+    Each data point is represented by a large circular marker with vertical error
+    bars extending asymmetrically above and below. The error bars have visible caps
+    at both ends. The title follows the correct format "errorbar-asymmetric · seaborn
+    · pyplots.ai" at the top. The x-axis shows "Device Model" with 8 categorical labels,
+    and the y-axis shows "Battery Life (hours)" ranging from approximately 6 to 18.
+    A subtle grid with dashed lines is visible in the background. An annotation box
+    in the bottom-right corner explains "10th–90th percentile". The layout is clean
+    with good use of whitespace and the whitegrid seaborn style.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized appropriately (s=400) for 8 data points, error bars
+          thick and visible with proper caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Battery Life (hours)" includes units, "Device Model" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is good (alpha=0.3, dashed), but the annotation could be more
+          prominently placed or use a proper legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct asymmetric error bar plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=categories, Y=central values, errors correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Asymmetric error bars with caps, annotation explaining bounds
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Annotation correctly explains "10th–90th percentile"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "errorbar-asymmetric · seaborn · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows variety: different central values, clearly asymmetric errors
+          (some with larger upper, some with larger lower)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Battery life comparison is plausible and neutral, though slightly
+          generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Battery life values (6-18 hours) are realistic for devices
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: While sns.scatterplot is used for the points, the core asymmetric
+          errorbar functionality relies entirely on matplotlib's errorbar(). Seaborn
+          doesn't have native asymmetric error bar support, so this is an acceptable
+          compromise, but it doesn't showcase distinctive seaborn features.
+  verdict: APPROVED
diff --git a/plots/errorbar-basic/metadata/altair.yaml b/plots/errorbar-basic/metadata/altair.yaml
index 975c9651ee..402695f855 100644
--- a/plots/errorbar-basic/metadata/altair.yaml
+++ b/plots/errorbar-basic/metadata/altair.yaml
@@ -24,3 +24,176 @@ review:
   - Y-axis starting at 0 creates unnecessary whitespace at bottom since all values
     are above 20
   - No tooltip added for interactivity (Altair strength not fully utilized)
+  image_description: The plot displays a basic error bar visualization with 6 experimental
+    groups (Control, Treatment A through E) on the x-axis and "Response Value (units)"
+    on the y-axis ranging from 0 to 54. Each data point is shown as a blue circle
+    (#306998) with vertical error bars extending above and below. The error bars have
+    visible caps (horizontal tick marks) at both ends. The title "errorbar-basic ·
+    altair · pyplots.ai" appears at the top in a clear, readable font. The y-axis
+    has a subtle dashed grid. Notably, Treatment C and Treatment D show asymmetric
+    error bars - Treatment C has a larger lower error than upper, while Treatment
+    D has a larger upper error than lower. All text is clearly legible with appropriate
+    font sizes.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis labels well-spaced with labelAngle=0
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points sized at 300, error bars with strokeWidth=3, caps clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998), high contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, though starting y-axis at 0 creates some unused
+          space at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Experimental Group" and "Response Value (units)" - descriptive
+          with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (gridOpacity=0.3, dashed), but no legend present (not
+          strictly needed for single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct error bar plot with points, bars, and caps
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis with error ranges
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has caps, consistent widths, asymmetric errors demonstrated
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis domain [0, 55] shows all data with room for error bars
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "errorbar-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows symmetric (Control, Treatment E) AND asymmetric (Treatment
+          C, D) error bars; good variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Experimental treatment comparison is a classic use case for error
+          bars
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible for a treatment response; could be more specific
+          to a real domain
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot elements → layer → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair 6.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair layering (mark_circle, mark_rule, mark_tick),
+          declarative encoding. Could leverage tooltips or selections for interactivity.
+  verdict: APPROVED
diff --git a/plots/errorbar-basic/metadata/bokeh.yaml b/plots/errorbar-basic/metadata/bokeh.yaml
index 7fded2e4f9..ea76e56bb0 100644
--- a/plots/errorbar-basic/metadata/bokeh.yaml
+++ b/plots/errorbar-basic/metadata/bokeh.yaml
@@ -24,3 +24,174 @@ review:
   - Text sizes could be slightly larger for the 4800x2700 canvas (title 36pt is good,
     but axis labels at 28pt could be 32pt)
   - The Bokeh toolbar in the corner adds visual clutter to the static PNG output
+  image_description: The plot displays an error bar chart with 6 experimental groups
+    (Control, Treatment A, B, C, D, E) along the x-axis and "Response Value (units)"
+    on the y-axis ranging from 0 to ~55. Each data point is represented by a blue
+    circle with vertical error bars extending above and below. The error bars have
+    visible caps (horizontal lines at the ends). Treatment D shows the highest mean
+    value (~48), while Control has the lowest (~25). Treatment C notably shows asymmetric
+    error bars with a larger lower uncertainty. The color scheme uses a consistent
+    blue (#306998) throughout. The title "errorbar-basic · bokeh · pyplots.ai" appears
+    at the top. A subtle dashed horizontal grid helps with readability. There is a
+    Bokeh logo/toolbar visible in the top-right corner.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable, though slightly
+          small relative to the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Error bars and markers are appropriately sized for the 6 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color is accessible, no color differentiation needed
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout but Bokeh toolbar in corner adds visual noise
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Response Value (units)" and "Experimental
+          Group"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, but no legend is needed/present
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct error bar plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, numeric values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars with visible caps, consistent widths, asymmetric errors
+          demonstrated
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data with proper padding (starts at 0, ends above
+          max)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "errorbar-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows asymmetric errors (Treatment C, D), varying means, but could
+          show more dramatic differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plausible experimental/clinical trial scenario with control and treatment
+          groups
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for a generic response variable
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also creates plot.html (acceptable for Bokeh)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Whisker with TeeHead for caps, ColumnDataSource - good Bokeh
+          patterns but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/errorbar-basic/metadata/highcharts.yaml b/plots/errorbar-basic/metadata/highcharts.yaml
index 6dc0406db7..1174926453 100644
--- a/plots/errorbar-basic/metadata/highcharts.yaml
+++ b/plots/errorbar-basic/metadata/highcharts.yaml
@@ -25,3 +25,173 @@ review:
     or not rendering properly
   - Axis labels lack units (e.g. Measured Value units would be more informative)
   - Only demonstrates symmetric error bars; spec mentions asymmetric errors as a feature
+  image_description: The plot shows a bar chart with error bars displaying experimental
+    measurements for 6 samples (Sample A through Sample F). Blue column bars (#306998)
+    represent the mean values ranging from approximately 35 to 52 on the y-axis. Black
+    error bars with visible caps (whiskers) extend above and below each bar, showing
+    the error ranges. The title "errorbar-basic · highcharts · pyplots.ai" is prominently
+    displayed at the top in bold. A subtitle reads "Experimental measurements with
+    error ranges". The x-axis is labeled "Experimental Samples" and the y-axis is
+    labeled "Measured Value". A legend in the top-right shows "Mean Value" with a
+    blue circle indicator. The grid uses dashed gray lines. The layout is clean with
+    good use of canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 64px, axis titles at 42px, labels at 32px - all highly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars and error bars are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) with black error bars - colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but no units specified
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: false
+        comment: Dashed grid is subtle, legend well positioned but only shows "Mean
+          Value" (Error Range not visible in legend)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct error bar plot with column bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y with error bars
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars have visible caps, consistent widths
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 25, shows all data clearly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: false
+        comment: Legend shows "Mean Value" but Error Range legend entry not visible
+          in final image
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "errorbar-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows symmetric error bars with varying magnitudes; could show asymmetric
+          errors too per spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Experimental samples with measurements is a realistic scientific
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 35-52 with errors 2.8-5.8 are plausible but generic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple linear script with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed needed since
+          no random data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Highcharts' native errorbar series type with proper whisker/stem
+          styling, column series, and interactive HTML output
+  verdict: APPROVED
diff --git a/plots/errorbar-basic/metadata/letsplot.yaml b/plots/errorbar-basic/metadata/letsplot.yaml
index d29697b11e..1a835cdab6 100644
--- a/plots/errorbar-basic/metadata/letsplot.yaml
+++ b/plots/errorbar-basic/metadata/letsplot.yaml
@@ -23,3 +23,176 @@ review:
   - Axis labels lack units (e.g., Measured Value (units) would be better)
   - Could demonstrate asymmetric error bars to show full feature coverage as mentioned
     in spec
+  image_description: The plot displays a basic error bar chart with 5 experimental
+    groups (Control, Treatment A, Treatment B, Treatment C, Treatment D) on the x-axis
+    and "Measured Value" on the y-axis (range ~40-68). Each group shows a central
+    blue point marker with vertical error bars extending above and below, featuring
+    horizontal caps at both ends. The colors are a consistent blue (#306998) for both
+    points and error bars. The title correctly displays "errorbar-basic · letsplot
+    · pyplots.ai" at the top. The background is clean white with subtle gray grid
+    lines, using a minimal theme. The data shows variation across groups with Treatment
+    B having the highest mean (~61) and largest error range, while Control has the
+    lowest mean (~45).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Error bars and points are appropriately sized for the 5 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("Experimental Group", "Measured Value") but
+          lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate; no legend needed for single series,
+          but grid lines are visible (acceptable)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct error bar plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (categories) and Y (values) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars with caps present, symmetric errors shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (40-68) shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (appropriate)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "errorbar-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows varying error magnitudes and mean values, but all errors are
+          symmetric (spec mentions asymmetric as an option)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Experimental treatment data is a perfect real-world scenario for
+          error bars
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in 40-68 range with errors of 4-7 are realistic for experimental
+          measurements
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed comment for
+          clarity
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot2-style grammar correctly with geom_errorbar and geom_point,
+          but doesn't leverage advanced lets-plot features like tooltips or interactive
+          elements
+  verdict: APPROVED
diff --git a/plots/errorbar-basic/metadata/matplotlib.yaml b/plots/errorbar-basic/metadata/matplotlib.yaml
index 97380970ad..0529136264 100644
--- a/plots/errorbar-basic/metadata/matplotlib.yaml
+++ b/plots/errorbar-basic/metadata/matplotlib.yaml
@@ -24,3 +24,159 @@ review:
   - Could showcase more matplotlib-specific features like error bar color gradients
     or connecting lines between points
   - Y-axis only grid is acceptable but full grid could enhance readability
+  image_description: The plot displays 6 experimental groups (Control, Treatment A
+    through E) on the x-axis with their response values on the y-axis (ranging from
+    0 to ~55 units). Each group is represented by a blue circular marker (#306998
+    color) with vertical error bars extending above and below. The error bars have
+    visible caps at both ends. Treatment C and D show asymmetric error bars - Treatment
+    C has a notably larger lower error bar, while Treatment D has a larger upper range.
+    The title "errorbar-basic · matplotlib · pyplots.ai" is displayed at the top.
+    The y-axis is labeled "Response Value (units)" and x-axis is labeled "Experimental
+    Group". A subtle horizontal dashed grid (alpha=0.3) aids readability.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all category labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (size=15) and error bars (linewidth=3, capsize=10) are well-sized
+          for 6 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, no colorblind concerns
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of 16:9 aspect
+          ratio
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "(units)", X-axis is descriptive "Experimental Group"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct error bar plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, errors correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has caps (capsize=10), consistent widths, asymmetric errors demonstrated
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0 to max+15% padding, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "errorbar-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows asymmetric errors (Treatment C, D) and symmetric errors (others),
+          but could show more variety in error magnitudes (-2)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical/experimental trial context with Control and Treatment groups
+          is highly realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 25-48 with errors 2-6.5 are plausible for experimental measurements
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → styling → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/errorbar-basic/metadata/plotly.yaml b/plots/errorbar-basic/metadata/plotly.yaml
index 33c726dab8..8e4a9683e0 100644
--- a/plots/errorbar-basic/metadata/plotly.yaml
+++ b/plots/errorbar-basic/metadata/plotly.yaml
@@ -27,3 +27,176 @@ review:
     in top-left)
   - Could leverage Plotly-specific features like custom hover templates showing exact
     values
+  image_description: The plot displays a basic error bar chart with 6 experimental
+    groups (Control, Treatment A through E) on the x-axis and "Response Value (units)"
+    on the y-axis ranging from 0 to 90. Each data point is shown as a large blue circular
+    marker (#306998 color) with vertical error bars extending above and below. The
+    error bars have clearly visible horizontal caps at both ends. The title "errorbar-basic
+    · plotly · pyplots.ai" is centered at the top. A legend labeled "Mean ± SE" appears
+    in the top-left corner. The background is white with subtle horizontal gridlines.
+    All text is legible and appropriately sized.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels all clearly readable at the target
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; x-axis labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized appropriately (size=28), error bars clearly visible
+          with thickness=4 and width=16 caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Response Value (units)" with units; X-axis has descriptive
+          "Experimental Group"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and good, but legend text could benefit from units
+          clarification
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct error bar plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X mapped to categories, Y to means, error to error bars
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars with visible caps, consistent widths, clear error range
+          markers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range [0, 90] shows all data points and error bars completely
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Mean ± SE"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format `{spec-id} · {library} · pyplots.ai`
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows varying error magnitudes (3.6 to 9.3) demonstrating different
+          precision levels; however, only symmetric errors shown (spec mentions asymmetric
+          errors as a consideration)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Experimental/clinical trial scenario with Control and Treatment groups
+          is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible for a generic response metric; could be more
+          specific to a real-world domain
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Scatter with error_y which is standard Plotly; also generates
+          interactive HTML output which is a Plotly strength. However, could leverage
+          more Plotly-specific features like hover templates or animations
+  verdict: APPROVED
diff --git a/plots/errorbar-basic/metadata/plotnine.yaml b/plots/errorbar-basic/metadata/plotnine.yaml
index f2eee9a204..b65d8d88fd 100644
--- a/plots/errorbar-basic/metadata/plotnine.yaml
+++ b/plots/errorbar-basic/metadata/plotnine.yaml
@@ -21,3 +21,173 @@ review:
   weaknesses:
   - Missing grid lines would improve readability (add panel_grid with subtle alpha)
   - Axis labels lack units (e.g., Measurement Value (units) would be more informative)
+  image_description: The plot displays a basic error bar chart with 6 experimental
+    samples (Sample A through F) on the x-axis and Measurement Value on the y-axis
+    (ranging from ~30 to ~58). Each sample is represented by a blue point (#306998
+    color) with vertical error bars extending above and below. The error bars have
+    visible horizontal caps at both ends. The title "errorbar-basic · plotnine · pyplots.ai"
+    is displayed at the top. The plot uses a clean minimal theme with a light gray
+    background and subtle grid lines. All text is clearly legible with good font sizes.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable with
+          appropriate font sizes (24pt title, 20pt axis labels, 16pt tick labels)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; x-axis labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points (size=6) and error bars (size=1.5) are clearly visible and
+          well-proportioned for 6 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with balanced margins, plot fills appropriate
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("Experiment", "Measurement Value") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid visible (theme_minimal removes it), which reduces readability
+          slightly
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct error bar plot with points and error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (categorical samples) and Y (measurement values) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars with caps, consistent widths, symmetric errors - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (appropriate)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "errorbar-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in measurements and error sizes, but all errors are
+          symmetric (spec mentions asymmetric as a possibility)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Lab measurements with uncertainty is a realistic scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in 30-55 range with errors of 2.8-5.5 are plausible for lab
+          measurements
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but missing bbox_inches equivalent
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with geom_errorbar and geom_point,
+          theme_minimal, but doesn't leverage advanced plotnine features like faceting
+          or scale customization
+  verdict: APPROVED
diff --git a/plots/errorbar-basic/metadata/pygal.yaml b/plots/errorbar-basic/metadata/pygal.yaml
index 1364514db3..bd794cf17d 100644
--- a/plots/errorbar-basic/metadata/pygal.yaml
+++ b/plots/errorbar-basic/metadata/pygal.yaml
@@ -25,3 +25,185 @@ review:
   - Error bar caps could be slightly more prominent for better visibility
   - Does not leverage pygal interactive features (tooltips showing exact values would
     enhance the visualization)
+  image_description: The plot displays a basic error bar chart with 6 experimental
+    groups (Control, Treatment A through E) on the x-axis and Response Value (units)
+    on the y-axis ranging from 0 to ~55. Each data point is represented by a blue
+    (#306998) filled circle with vertical error bars extending above and below, terminated
+    by horizontal caps. The title "errorbar-basic · pygal · pyplots.ai" appears at
+    the top. The background is white with subtle horizontal grid lines. Treatment
+    C and D show notably asymmetric error bars as intended by the data. The layout
+    is clean with good use of the canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all readable. Font sizes
+          are appropriately scaled for the 4800x2700 canvas. Slight deduction as tick
+          labels could be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated and
+          readable.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Data points (dots_size=22) and error bars (stroke_width=5) are well-sized
+          and clearly visible. Error bar caps are appropriately proportioned.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast against white background.
+          No colorblind concerns.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with data occupying the upper portion. Slight
+          imbalance as y-axis starts at 0 but data ranges from ~23-55, leaving lower
+          portion empty.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Response Value (units)" with units; X-axis has "Experimental
+          Group" which is descriptive.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Horizontal grid lines are subtle and helpful. No legend shown (appropriate
+          since single series), but the legend is disabled rather than elegantly handled.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct error bar plot implementation using XY chart with manual
+          error bar construction.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to x-axis, means to y-axis positions.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Error bars with caps present, asymmetric errors supported. Spec mentions
+          "visible caps" which are implemented, though caps could be slightly more
+          prominent.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points and error bars fully visible within the chart area.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden for single series display.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "errorbar-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both symmetric (Control, Treatment E) and asymmetric error
+          bars (Treatment C, D). Good variety in mean values. Could show more dramatic
+          asymmetry to better demonstrate the feature.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Experimental treatment data with control group is a classic, realistic
+          scientific context.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in 25-50 range are plausible for generic "response values."
+          Errors of 2-6 units are reasonable. The context is somewhat generic.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → style → chart → save. No functions
+          or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values), no random seed needed.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (appropriate for pygal).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart and custom Style effectively. Creative manual
+          construction of error bars using multiple series. However, pygal doesn't
+          have native error bar support, so this is a workaround rather than leveraging
+          distinctive pygal features like interactivity or tooltips.
+  verdict: APPROVED
diff --git a/plots/errorbar-basic/metadata/seaborn.yaml b/plots/errorbar-basic/metadata/seaborn.yaml
index 850945eb6e..5f69637714 100644
--- a/plots/errorbar-basic/metadata/seaborn.yaml
+++ b/plots/errorbar-basic/metadata/seaborn.yaml
@@ -24,3 +24,171 @@ review:
   - Error bars added via matplotlib errorbar() instead of seaborn native errorbar
     parameter in barplot
   - Axis label uses generic units placeholder instead of specific measurement units
+  image_description: The plot displays a bar chart showing 6 treatment groups (Control,
+    Treatment A, B, C, D, E) on the x-axis with Response Value (units) on the y-axis
+    ranging from 0 to approximately 75. Each bar has prominent black error bars with
+    visible caps (capsize=8). The bars alternate between Python blue (#306998) and
+    Python yellow (#FFD43B) colors. The title correctly shows "errorbar-basic · seaborn
+    · pyplots.ai". Text is clearly legible with appropriate font sizes. The layout
+    uses a white grid background with subtle dashed gridlines on the y-axis only.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well sized, error bars clearly visible with proper caps and
+          thickness
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow alternating scheme is colorblind-friendly, but the alternating
+          pattern has no semantic meaning
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Has units "(units)" but generic placeholder rather than specific
+          units like mg/dL or %
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend explaining the color alternation
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct error bar plot using bars with error bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=categories, Y=means, error bars correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Error bars with visible caps present as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data including error bar extensions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this single-series plot (colors are decorative)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: errorbar-basic · seaborn · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows error bars clearly, but only symmetric errors (spec mentions
+          asymmetric as a possibility)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Treatment groups with response values is a realistic clinical/experimental
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 43-61 with errors 4.5-7.1 are realistic for experimental measurements
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but sns.set_style called after figure creation
+          (minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn's barplot with proper hue/palette API
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.barplot correctly, but error bars added via matplotlib rather
+          than seaborn's native ci/errorbar parameter
+  verdict: APPROVED
diff --git a/plots/facet-grid/metadata/altair.yaml b/plots/facet-grid/metadata/altair.yaml
index d81d8952f7..4edc1dbef7 100644
--- a/plots/facet-grid/metadata/altair.yaml
+++ b/plots/facet-grid/metadata/altair.yaml
@@ -26,3 +26,178 @@ review:
     the plot area
   - Could benefit from interactive selections (Altair's distinctive feature) to highlight
     crop types across all facets
+  image_description: 'The plot displays a 3×3 faceted grid of scatter plots showing
+    crop yield vs water usage. The grid is organized with Season (Spring, Summer,
+    Fall) as columns and Soil Type (Sandy, Clay, Loam) as rows. Each subplot contains
+    scatter points colored by crop type: blue for Wheat, yellow for Corn, and green
+    for Soybean. The title "facet-grid · altair · pyplots.ai" appears at the top left.
+    Axis labels show "Water Usage (mm)" on x-axis (range ~40-95) and "Yield (tons/ha)"
+    on y-axis (range ~5-13). A color legend for Crop appears on the right side. The
+    grid lines are subtle gray, and the overall layout is clean with good spacing
+    between panels.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title, axis labels, and tick marks are appropriately
+          sized, though axis title font could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere in the visualization
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized with good opacity (0.7); slightly crowded
+          in some cells but still distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; the faceted grid fills the area well, though
+          legend is positioned slightly away from the main plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Labels include units: "Yield (tons/ha)", "Water Usage (mm)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; legend is clear but positioned at the edge
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted grid with scatter plots in each cell
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (Water Usage) and Y (Yield) correctly assigned with two categorical
+          faceting variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: faceting by row (Soil Type) and column
+          (Season), shared axes, scatter as base plot'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Crop legend correctly shows all three categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "facet-grid · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across soil types (Loam yields higher), seasons (Summer
+          slightly higher), and crops; demonstrates the multi-dimensional exploration
+          purpose well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Crop yield vs water usage across soil types and seasons is a realistic
+          agricultural scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Yield values (5-13 tons/ha) and water usage (40-95 mm) are plausible,
+          though water usage range is on the lower end for seasonal measurements
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as both plot.png and plot.html, which is fine, but minor deduction
+          for not following the exact single-output convention
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's declarative faceting with .facet(), Column/Row
+          headers, encoding types (:Q, :N), and tooltips. Could leverage more Altair-specific
+          features like interactive selections or conditional encoding
+  verdict: APPROVED
diff --git a/plots/facet-grid/metadata/bokeh.yaml b/plots/facet-grid/metadata/bokeh.yaml
index dbb47487ec..a5e933d82e 100644
--- a/plots/facet-grid/metadata/bokeh.yaml
+++ b/plots/facet-grid/metadata/bokeh.yaml
@@ -24,3 +24,178 @@ review:
     artifacts appear in subplot corners
   - No use of Bokeh distinctive interactive features (hover tooltips, linked brushing,
     pan/zoom tools)
+  image_description: The plot displays a 4×3 faceted grid showing the relationship
+    between Marketing Spend ($K) and Sales ($K) across three regions (North, South,
+    East) and four seasons (Spring, Summer, Fall, Winter). Each subplot contains approximately
+    25 scatter points. Colors are blue (#306998) for North, orange (#E69F00) for South,
+    and teal (#4ECDC4) for East. The title "facet-grid · bokeh · pyplots.ai" appears
+    at the top center. Facet labels (e.g., "North · Spring") appear in the top-left
+    of each cell. The grid uses dashed gray lines with subtle opacity. Y-axis labels
+    "Sales ($K)" appear only on the leftmost column, and X-axis labels "Marketing
+    Spend ($K)" appear only on the bottom row. There are small colored circular artifacts
+    visible in the top-right corner of each subplot.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 32pt, axis labels 20pt, tick labels 16pt, facet labels 18pt
+          - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Markers sized at 18 with 0.7 alpha, appropriate for ~25 points per
+          cell. Minor: some markers at edges are slightly clipped'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/orange/teal palette is colorblind-safe (no red-green conflict)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good grid layout filling the canvas. Minor: legend at bottom is
+          HTML-based but doesn''t render in PNG export'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Labels include units: "Marketing Spend ($K)", "Sales ($K)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3), but the HTML legend div does not render
+          in PNG export - only small colored dots appear in corners as artifacts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted grid with scatter plots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Marketing spend on X, Sales on Y, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Faceting by two categorical variables (region, season), shared axes
+          scales, facet labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes ranges (5-50 X, 20-130 Y)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Region-color mapping is accurate (attempted via HTML div)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "facet-grid · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation across regions (different base values) and seasons
+          (seasonal adjustments create visible patterns)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Product performance across regions/seasons is a plausible business
+          scenario, though somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Marketing spend in $K (10-40 range) and sales in $K (40-120 range)
+          are realistic business metrics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data generation → plot creation → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` is set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, bokeh modules)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh 3.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic scatter plots with ColumnDataSource. Does not leverage
+          Bokeh's distinctive features like hover tooltips, linked brushing, or interactive
+          tools. The legend is implemented via HTML Div which doesn't render in static
+          PNG export.
+  verdict: APPROVED
diff --git a/plots/facet-grid/metadata/highcharts.yaml b/plots/facet-grid/metadata/highcharts.yaml
index 724775cdae..4d05ce7083 100644
--- a/plots/facet-grid/metadata/highcharts.yaml
+++ b/plots/facet-grid/metadata/highcharts.yaml
@@ -25,3 +25,173 @@ review:
   - Y-axis label shows 'Values' instead of 'Growth (cm)' in middle and right columns
     (inconsistent labeling)
   - Text sizes could be larger for optimal readability at 4800×2700 resolution
+  image_description: 'The plot displays a 3×3 faceted grid of scatter plots showing
+    "Plant Growth Study". The columns represent light conditions (Low, Medium, High)
+    with bold labels at the top. The rows represent soil types (Sandy, Loamy, Clay)
+    labeled on the right side. Each subplot shows scatter points with Water (mm) on
+    X-axis (range 15-105) and Growth (cm) on Y-axis (range 0-55). Colors are colorblind-safe:
+    Blue (#306998) for Sandy row, Yellow (#FFD43B) for Loamy row, and Purple (#9467BD)
+    for Clay row. Points show clear positive correlation between water and growth,
+    with higher growth values in High light conditions. The main title uses the correct
+    format "facet-grid · highcharts · pyplots.ai". A subtitle explains the faceting
+    structure.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Text is readable but could be slightly larger for optimal 4800×2700
+          display
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers well-sized (radius 10) with dark borders for 25 points per
+          facet
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette (blue, yellow, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight imbalance with row labels on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis shows "Growth (cm)" but middle/right columns show "Values"
+          label (bug)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Dashed grid is subtle, legend disabled as expected for faceted plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted grid with scatter subplots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Water, Y=Growth correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Faceting by row (soil) and column (light), shared scales, category
+          labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, facet labels accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct: "Plant Growth Study · facet-grid · highcharts · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation across all 9 facets with different patterns per soil/light
+          combo
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth study is neutral, scientifically plausible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Water 20-100mm, Growth 0-50cm are realistic values
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: All imports used, though some like urllib could be abstracted
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Unknown library version
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts multi-axis capability and annotations module well,
+          but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/facet-grid/metadata/letsplot.yaml b/plots/facet-grid/metadata/letsplot.yaml
index 73072f24c9..b3418fe68d 100644
--- a/plots/facet-grid/metadata/letsplot.yaml
+++ b/plots/facet-grid/metadata/letsplot.yaml
@@ -22,3 +22,173 @@ review:
   - Color palette could be more colorblind-friendly (yellow/red proximity)
   - Data generation uses for-loop which is less elegant than vectorized numpy/pandas
     operations
+  image_description: The plot displays a 3x4 faceted grid showing the relationship
+    between Unit Price ($) and Units Sold for a sales analysis scenario. The grid
+    is faceted by Category (Clothing, Electronics, Home) on rows and Region (East,
+    North, South, West) on columns. Each panel contains scatter points with linear
+    regression trend lines. Colors are blue (#306998) for Electronics, yellow (#FFD43B)
+    for Clothing, and red (#DC2626) for Home. The title "facet-grid · letsplot · pyplots.ai"
+    appears at the top. Strip labels on the right show Category names, and column
+    headers at the top show Region names. All panels show an inverse relationship
+    between price and units sold, with trend lines clearly visible.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable. Strip text
+          could be slightly larger but is acceptable.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized with alpha=0.7 for 25 points per panel, trend
+          lines visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Yellow and red may be difficult for some colorblind users; blue is
+          clearly distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas, plot fills appropriate space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Unit Price ($)" and "Units Sold"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean minimal theme, legend well-positioned on right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted grid implementation with scatter + trend
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Price, Y=Units Sold correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Faceting by both row and column variables, shared axes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "facet-grid · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across categories and regions with different base
+          prices and region factors
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Sales analysis scenario is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Price ranges and unit counts are plausible for retail data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses `strict=True` in zip which is fine, but uses for-loop data construction
+          instead of vectorized approach
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses lets-plot's ggplot2 grammar, facet_grid, geom_smooth, theme_minimal.
+          Could leverage more interactive features.
+  verdict: APPROVED
diff --git a/plots/facet-grid/metadata/matplotlib.yaml b/plots/facet-grid/metadata/matplotlib.yaml
index 610f5c759a..c8e6a73540 100644
--- a/plots/facet-grid/metadata/matplotlib.yaml
+++ b/plots/facet-grid/metadata/matplotlib.yaml
@@ -29,3 +29,180 @@ review:
     may clip them slightly
   - South summer temperatures reaching ~50°C are on the extreme end for realistic
     data
+  image_description: 'The plot displays a 3×4 grid of scatter plots showing Energy
+    Consumption (kWh) vs Temperature (°C). The columns represent seasons (Spring,
+    Summer, Fall, Winter) and rows represent regions (North, South, East). Each region
+    uses a distinct color: North is blue (#306998), South is yellow (#FFD43B), and
+    East is green (#4DAF4A). The main title "facet-grid · matplotlib · pyplots.ai"
+    appears at the top. Row labels (North, South, East) are positioned on the right
+    side of each row. All subplots share the same axes scales, with temperature ranging
+    from approximately -10 to 50°C and energy consumption from ~100 to 450 kWh. Subtle
+    dashed grid lines are present in each subplot. The relationship between temperature
+    and energy consumption varies by season and region, showing realistic patterns
+    where extreme temperatures lead to higher energy consumption.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, facet headers at 18pt, tick labels
+          at 12pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements; facet labels well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized at s=120 with alpha=0.7, appropriate for 25 points
+          per facet; white edges provide good contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight crowding of row labels against right
+          edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" and "Energy Consumption (kWh)" include units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed); no legend needed as colors identify
+          rows
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted grid with scatter plots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Temperature, Y=Energy correctly mapped; faceted by Region (rows)
+          and Season (columns)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Two-way faceting, shared axes, scatter base plots, category labels
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Row and column labels correctly identify facets
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "facet-grid · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across regions and seasons; different patterns visible
+          (U-shaped energy vs temp relationship)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature vs Energy consumption is a real, comprehensible scenario
+          with region/season variation
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Most values realistic; some South summer temperatures reaching ~50°C
+          are quite high
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern matplotlib API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but library guidelines suggest absolute path
+          or consistent naming
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plt.subplots with sharex/sharey, fig.supxlabel/supylabel,
+          axes array indexing; could leverage matplotlib's GridSpec for more advanced
+          layout control
+  verdict: APPROVED
diff --git a/plots/facet-grid/metadata/plotly.yaml b/plots/facet-grid/metadata/plotly.yaml
index 7a6a4cb0cf..973b034147 100644
--- a/plots/facet-grid/metadata/plotly.yaml
+++ b/plots/facet-grid/metadata/plotly.yaml
@@ -27,3 +27,180 @@ review:
     ~15 points per facet
   - Row facet labels are displayed vertically which reduces readability compared to
     horizontal text
+  image_description: The plot displays a 4×4 faceted grid layout showing the relationship
+    between Marketing Spend ($K) and Sales ($K). The columns represent quarters (Q1,
+    Q2, Q3, Q4) and the rows represent regions (North, South, East, West). Each subplot
+    contains approximately 15 scatter points. The color scheme uses blue (#306998)
+    for all data points consistently across all facets. A legend appears on the right
+    side showing region colors (North=blue, South=yellow, East=blue, West=yellow),
+    though this appears to be an artifact from Plotly's automatic color mapping rather
+    than the code's explicit single-color setting. The row labels are displayed vertically
+    on the right side in format "Region=North", etc. Column labels at the top show
+    "Quarter=Q1", etc. The main title "facet-grid · plotly · pyplots.ai" is prominently
+    displayed at the top center. Axes show Marketing Spend ($K) on x-axis (10-50 range)
+    and Sales ($K) on y-axis (60-140 range). Grid lines are subtle with light gray
+    color.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and clear at 32pt, axis labels at 20pt, tick labels
+          at 16pt; facet annotations at 18pt are readable but slightly small for the
+          annotation text
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at size=12 with 0.8 opacity are visible but could be slightly
+          larger given 15 points per facet
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is accessible; no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with 4×4 grid; margins are reasonable but
+          legend takes some space redundantly
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Marketing Spend ($K)", "Sales ($K)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle at 0.1 alpha (good), but legend showing regions is
+          redundant since regions are already encoded by row facets
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted grid plot with scatter subplots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped to Marketing Spend and Sales
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Facets by both row (Region) and column (Quarter) as spec requires
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Facet labels correctly identify categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "facet-grid · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across regions and quarters with different base values;
+          could show more dramatic differences between facets
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Marketing spend vs sales is a realistic business scenario with plausible
+          relationships
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable ($10-50K marketing, $60-140K sales) though
+          the correlation is quite strong across all facets
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, plotly.express)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly Express API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses px.scatter with facet_row/facet_col which is Plotly Express's
+          strength; also outputs HTML for interactivity; could leverage more Plotly
+          features like hover data or trendlines
+  verdict: APPROVED
diff --git a/plots/facet-grid/metadata/plotnine.yaml b/plots/facet-grid/metadata/plotnine.yaml
index 12ebca556c..3fbefde727 100644
--- a/plots/facet-grid/metadata/plotnine.yaml
+++ b/plots/facet-grid/metadata/plotnine.yaml
@@ -23,3 +23,176 @@ review:
   - Facet strip labels use label_both for clear identification of row/column variables
   weaknesses:
   - Y-axis label could be slightly larger for better readability at full resolution
+  image_description: 'The plot displays a 3x3 faceted grid of scatter plots showing
+    plant growth data. The columns represent light levels (Low, Medium, High) and
+    rows represent soil types (Sandy, Loamy, Clay). Each panel contains scatter points
+    for three plant species (Basil in mint green, Fern in orange, Tomato in blue/purple)
+    showing the relationship between Water (mL/day) on the x-axis (range 20-70) and
+    Growth (cm) on the y-axis (range 0-120). The title "facet-grid · plotnine · pyplots.ai"
+    is displayed at the top in bold. The legend for Species is positioned on the right
+    side. Facet labels use "label_both" format (e.g., "light_level: Low", "soil_type:
+    Sandy"). Colors are from the Set2 palette. The grid lines are subtle gray.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, axis titles 18pt, axis text 14pt, strip text
+          14pt bold - all clearly readable. Slightly below optimal for tick labels.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, facet labels are clear and well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points at size 3.5 with alpha 0.75 are visible, though some overlap
+          within panels due to data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Set2 palette is colorblind-safe with good color differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with 16:9 aspect ratio, slight imbalance
+          with legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Water (mL/day)" and
+          "Growth (cm)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle with alpha 0.4, legend well-positioned but could be closer
+          to the plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted grid plot type with 3x3 grid layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (water) and Y (growth) correctly mapped with categorical faceting
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: numeric x/y, row and column faceting
+          variables'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges, shared scales across panels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Species legend correctly identifies all three plant types
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "facet-grid · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across soil types, light levels, and species; clear
+          trends visible in high light/clay conditions vs low light/sandy
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth study with water, soil, light, and species is a plausible
+          scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable (25-65 mL water, 0-120 cm growth), though some
+          extreme growth values seem high
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used, well-organized from plotnine
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses facet_grid with labeller, scale_color_brewer, theme customization
+          - good usage but could demonstrate more advanced plotnine features
+  verdict: APPROVED
diff --git a/plots/facet-grid/metadata/pygal.yaml b/plots/facet-grid/metadata/pygal.yaml
index 578c6cdb62..3010b662de 100644
--- a/plots/facet-grid/metadata/pygal.yaml
+++ b/plots/facet-grid/metadata/pygal.yaml
@@ -26,3 +26,169 @@ review:
   - The faceted composition relies heavily on PIL rather than pygal native features
     - this is a reasonable workaround given pygal limitations
   - Row labels could be slightly larger for better visibility
+  image_description: The plot displays a 2×3 faceted grid showing plant growth data
+    (height vs days). The grid has two rows labeled "Sandy Soil" and "Clay Soil" (rotated
+    labels on left side), and three columns labeled "Low Light", "Medium Light", and
+    "High Light" at the top. Each cell contains a line chart with blue lines and circular
+    markers showing plant height (cm) on the y-axis (0-32 range) over days (0-30)
+    on the x-axis. The main title "facet-grid · pygal · pyplots.ai" is centered at
+    the top in dark gray. All subplots share consistent y-axis scales. The layout
+    is clean with light gray plot backgrounds and subtle horizontal grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable; font sizes are good but could
+          be slightly larger for optimal readability
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line strokes and markers are appropriately sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) used consistently, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, slight padding imbalance with row label
+          area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Labels include units: "Height (cm)" and "Days"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend shown (though not strictly needed here), grid is subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted grid with line charts in each cell
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Days, Y=Height correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has row/column faceting, shared axes, facet labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-35 y-axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Row labels correctly identify soil types
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across conditions; different growth rates visible
+          across facets; could show more dramatic differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth experiment is an excellent, realistic scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Height values (0-32 cm over 30 days) are plausible for plant growth
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear flow but uses functions/imports for image composition
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal.Line with custom Style, but the multi-panel composition
+          is done via PIL rather than native pygal features
+  verdict: APPROVED
diff --git a/plots/facet-grid/metadata/seaborn.yaml b/plots/facet-grid/metadata/seaborn.yaml
index 6c43b6fa4e..60a080a897 100644
--- a/plots/facet-grid/metadata/seaborn.yaml
+++ b/plots/facet-grid/metadata/seaborn.yaml
@@ -24,3 +24,182 @@ review:
     on the left edge
   - Could benefit from regression lines (lmplot or regplot) to highlight the varying
     relationships across facets
+  image_description: The plot displays a 3×4 grid of scatter plots (12 subplots total).
+    Each subplot shows Marketing Spend ($k) on the x-axis (range 5-30) vs Sales Revenue
+    ($k) on the y-axis (range ~10-55). The columns are labeled Q1, Q2, Q3, Q4 (quarters)
+    at the top, and rows are labeled Region A, Region B, Region C on the right side.
+    All data points are rendered in a consistent blue color (#306998) with white edge
+    borders, slight transparency, and appear well-sized for the data density (25 points
+    per cell). The title "facet-grid · seaborn · pyplots.ai" appears centered at the
+    top in bold. Grid lines are subtle dashed lines. The data shows a positive correlation
+    between marketing spend and sales revenue, with intercepts increasing across quarters
+    and slopes varying by region.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title, axis labels, and tick labels are appropriately
+          sized. Slight deduction as tick labels could be marginally larger for the
+          high-resolution output.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere in the plot.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are well-sized (s=150) with good alpha (0.7) for 25 points
+          per panel.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe; no color distinction
+          needed.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout overall; the grid fills the canvas well, though the right-side
+          row labels feel slightly cramped.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Labels include units: "Marketing Spend ($k)" and "Sales Revenue
+          ($k)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha=0.3. No legend needed
+          for single-color plot. However, axes are shared but Y-axis labels repeat
+          on every row unnecessarily.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted grid plot using seaborn's FacetGrid.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped; categorical faceting by row (Region) and column
+          (Quarter).
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has numeric X/Y, two categorical faceting variables, shared axes
+          scales.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes ranges.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; facet labels are accurate.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "facet-grid · seaborn · pyplots.ai".
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across both faceting dimensions (regions have different
+          slopes, quarters have different intercepts). Could show more dramatic differences
+          between facets.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Marketing spend vs sales revenue is a realistic business scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable ($5k-30k spend, $10k-55k revenue), though relationship
+          could be more varied.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions or classes; follows imports → data → plot → save structure.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and seaborn imported; all used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's FacetGrid which is a signature feature, but could
+          leverage additional seaborn features like regplot for regression lines or
+          color encoding with hue.
+  verdict: APPROVED
diff --git a/plots/forest-basic/metadata/altair.yaml b/plots/forest-basic/metadata/altair.yaml
index 82095e1099..49cb1eee6f 100644
--- a/plots/forest-basic/metadata/altair.yaml
+++ b/plots/forest-basic/metadata/altair.yaml
@@ -26,3 +26,181 @@ review:
   - All studies show negative effects favoring treatment - having at least one study
     crossing the null line would better demonstrate forest plot interpretation
   - Could leverage more Altair-specific features like selection for highlighting
+  image_description: The plot displays a forest plot for a meta-analysis with 10 studies
+    (Taylor 2023 through Johnson 2018) and a pooled estimate at the bottom. Each study
+    is shown with a blue horizontal line representing the 95% confidence interval
+    and a blue circular point for the effect size. The marker sizes vary based on
+    study weight - larger circles for higher-weighted studies (e.g., Williams 2020,
+    Wilson 2022) and smaller circles for lower-weighted studies (e.g., Brown 2021).
+    A vertical dashed gray reference line at x=0 indicates the null effect. The pooled
+    estimate is displayed as a distinctive yellow diamond with a blue border. The
+    x-axis shows "Standardized Mean Difference (95% CI)" ranging from approximately
+    -1.20 to 0.20. Study names appear on the y-axis. Text annotations "← Favors Treatment"
+    and "Favors Control →" appear near the bottom. The title "forest-basic · altair
+    · pyplots.ai" is centered at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and study names are all clearly readable with
+          appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all study names and labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Confidence interval lines are clearly visible with good stroke width;
+          point markers appropriately sized and weight-scaled
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) diamond provide excellent contrast;
+          no red-green issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall layout but the plot area could use more of the canvas;
+          some empty space on the right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'X-axis has descriptive label with units: "Standardized Mean Difference
+          (95% CI)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle which is good, but "Favors Treatment" and "Favors
+          Control" annotations are partially cut off at the edges
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct forest plot with point estimates, confidence intervals, and
+          pooled diamond
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Effect sizes on X-axis, studies on Y-axis, correct CI representation
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: diamond for pooled estimate, vertical
+          reference line at null, marker sizes proportional to weight'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points and confidence intervals completely
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this plot type; annotations explain direction
+          correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "forest-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variety in effect sizes, CI widths, and weights; all negative
+          effects though (no crossing null)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic meta-analysis of RCTs with plausible study names and years
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Standardized mean differences in reasonable range (-0.21 to -0.67);
+          weights sensible but one study crossing null would be more educational
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential structure: imports → data → chart layers → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html which is correct for altair
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of layered marks, tooltips, and declarative encoding; could
+          leverage more Altair-specific features like interactive selection or concatenation
+  verdict: APPROVED
diff --git a/plots/forest-basic/metadata/bokeh.yaml b/plots/forest-basic/metadata/bokeh.yaml
index ce0ac21fcf..9ad014122d 100644
--- a/plots/forest-basic/metadata/bokeh.yaml
+++ b/plots/forest-basic/metadata/bokeh.yaml
@@ -25,3 +25,174 @@ review:
   - All studies favor treatment - missing a study crossing the null line would better
     demonstrate forest plot capabilities
   - Could add HoverTool for interactivity (a Bokeh strength)
+  image_description: The forest plot displays a meta-analysis of blood pressure reduction
+    trials with 11 individual studies plus a pooled estimate. Each study (Smith et
+    al. 2018 through Thomas et al. 2023) is represented by a blue circle (point estimate)
+    with horizontal whiskers showing 95% confidence intervals and vertical end caps.
+    Marker sizes vary proportionally to study weight. The pooled estimate at the bottom
+    is shown as a distinctive yellow diamond. A vertical dashed gray line marks the
+    null effect at 0. The x-axis shows "Mean Difference in Blood Pressure (mmHg)"
+    ranging from -12 to 4. "← Favors Treatment" and "Favors Control →" labels appear
+    at the bottom. All effect sizes are negative, indicating treatment benefit. The
+    title "forest-basic · bokeh · pyplots.ai" appears at top left.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, study names all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, studies well-spaced vertically
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers well-sized proportional to weights, CI lines clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) for studies, yellow (#FFD43B) for pooled - colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but significant whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Mean Difference in Blood Pressure (mmHg)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed, but grid could be more subtle (alpha 0.3 is acceptable
+          but visible)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct forest plot with point estimates and CIs
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Effect sizes on X, studies on Y - correct mapping
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diamond for pooled, reference line at null, weighted markers, CI
+          whiskers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Study labels accurate and readable
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "forest-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variety of effect sizes and CI widths, but all favor treatment
+          (could show one borderline)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Blood pressure reduction trials - excellent realistic medical meta-analysis
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Effect sizes (-2.8 to -6.2 mmHg) realistic for BP trials, though
+          slightly narrow range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → figure → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, bokeh components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource, Span, Label, patch for diamond. Could
+          leverage more Bokeh-specific features like HoverTool for interactivity.
+  verdict: APPROVED
diff --git a/plots/forest-basic/metadata/highcharts.yaml b/plots/forest-basic/metadata/highcharts.yaml
index f8a89af8ad..d4680982b0 100644
--- a/plots/forest-basic/metadata/highcharts.yaml
+++ b/plots/forest-basic/metadata/highcharts.yaml
@@ -26,3 +26,180 @@ review:
   - Grid/legend score affected by disabled legend (though appropriate for this plot
     type)
   - Code structure has multiple series loops which adds complexity
+  image_description: The plot displays a forest plot for meta-analysis with 10 studies
+    plus a pooled estimate. Each study is labeled on the left (Smith et al. 2018 through
+    Anderson et al. 2023) with blue square markers representing effect sizes and horizontal
+    lines showing 95% confidence intervals. The pooled estimate at the bottom is shown
+    as a yellow diamond with a blue border. A vertical dashed line at x=0 marks "No
+    Effect" with a label at the top. The x-axis shows "Mean Difference (95% CI)" ranging
+    from -1.2 to 0.8. The title reads "forest-basic · highcharts · pyplots.ai" with
+    a subtitle "Meta-Analysis of Treatment Effect on Primary Outcome". The background
+    is white with very subtle dotted grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title, labels, and axis text are appropriately
+          sized for 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; study labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Square markers are appropriately sized with weight-proportional scaling;
+          CI lines are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe; no red-green
+          issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight excess whitespace between last study
+          and pooled estimate but acceptable
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'X-axis has descriptive label with units: "Mean Difference (95% CI)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (dotted, light gray), but legend is disabled which
+          is appropriate for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct forest plot implementation with effect sizes and confidence
+          intervals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Studies on Y-axis, effect sizes on X-axis, CI whiskers correctly
+          placed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: diamond for pooled estimate, vertical
+          reference line at null effect (0), marker size proportional to weight, clear
+          labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (-1.2 to 0.8) shows all data points and CIs completely
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; study labels serve this purpose effectively
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "forest-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows studies with varying effect sizes (positive and negative),
+          different CI widths, and weight variation; could show more extreme heterogeneity
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plausible meta-analysis scenario of treatment effect studies with
+          realistic author names and years
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Mean differences are realistic for clinical trials; pooled estimate
+          CI appropriately narrower than individual studies
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Linear structure but slightly complex with multiple series creation
+          loops
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed as data is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts scatter/line series, tooltips with custom data, plotLines
+          for reference; could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/forest-basic/metadata/letsplot.yaml b/plots/forest-basic/metadata/letsplot.yaml
index 8b9615841f..ff523f41d0 100644
--- a/plots/forest-basic/metadata/letsplot.yaml
+++ b/plots/forest-basic/metadata/letsplot.yaml
@@ -29,3 +29,182 @@ review:
     but might benefit from subtle horizontal grid lines for readability
   - The plot could benefit from lets-plot interactive features like tooltips showing
     exact values on hover
+  image_description: 'The forest plot displays 10 clinical trials (Johnson 2019 through
+    Taylor 2022) arranged vertically and ordered by effect size from lowest to highest.
+    Each study is represented by a blue square marker (point estimate) with horizontal
+    blue lines extending to show 95% confidence intervals. A dashed gray vertical
+    line at x=0 marks the null effect. At the bottom, a yellow diamond indicates the
+    pooled estimate with the label "Pooled: 0.31 [0.15, 0.46]" shown beneath it. The
+    title "forest-basic · letsplot · pyplots.ai" appears at the top. The x-axis is
+    labeled "Log Odds Ratio (95% CI)" and ranges from -0.5 to 1.0. Study names appear
+    on the left y-axis. The overall color scheme uses blue (#306998) for study data
+    and yellow (#FFD43B) for the pooled estimate diamond.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and prominent, study labels are clearly readable, axis
+          labels and tick marks are appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; study labels are well-spaced vertically
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers and whiskers are clearly visible; marker sizes vary by weight
+          as specified, though the size difference could be more pronounced
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout, though there's some unused space on the right
+          side of the plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'X-axis has descriptive label with units: "Log Odds Ratio (95% CI)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed, but the grid could be more subtle (currently minimal
+          vertical grid lines which is appropriate for forest plots)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct forest plot implementation with point estimates, confidence
+          intervals, and pooled diamond
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Effect sizes on x-axis, studies on y-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: vertical null reference line at 0, diamond
+          for pooled estimate, whiskers for CIs, marker size proportional to weight'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis appropriately shows full range of data including all confidence
+          intervals
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend present (appropriate for this plot type, but marker sizing
+          by weight isn't labeled)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "forest-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in effect sizes (both positive and negative), varying
+          CI widths, and different weights; one study crosses null, demonstrating
+          heterogeneity
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic meta-analysis scenario with plausible study names, years,
+          and log odds ratios
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Log odds ratios in realistic range (-0.45 to 1.02), weights sum to
+          ~100%
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements), but no seed comment for
+          clarity
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pandas and lets_plot used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar (geom_segment, geom_point, geom_polygon,
+          geom_text), theme customization, and scale_size_identity; however, doesn't
+          leverage lets-plot specific interactive features or tooltips
+  verdict: APPROVED
diff --git a/plots/forest-basic/metadata/matplotlib.yaml b/plots/forest-basic/metadata/matplotlib.yaml
index 7233967126..cc54c8e83b 100644
--- a/plots/forest-basic/metadata/matplotlib.yaml
+++ b/plots/forest-basic/metadata/matplotlib.yaml
@@ -24,3 +24,177 @@ review:
   - Missing legend explaining that marker size represents study weight
   - All effect sizes favor treatment (none crossing the null line to favor control
     would make data more illustrative)
+  image_description: 'The plot displays a forest plot for meta-analysis of 10 studies.
+    Each study (Johnson 2018 through Taylor 2023) is shown on the y-axis with horizontal
+    blue lines representing confidence intervals and blue circular markers of varying
+    sizes (proportional to study weight) indicating point estimates. A vertical dashed
+    gray reference line at x=0 marks the null effect. At the bottom, a yellow diamond
+    shows the pooled estimate. The x-axis is labeled "Standardized Mean Difference
+    (95% CI)" with annotations "← Favors Treatment" on the left and "Favors Control
+    →" on the right. The title follows the required format: "forest-basic · matplotlib
+    · pyplots.ai". Colors used are blue (#306998) for study markers/CIs and yellow
+    (#FFD43B) for the pooled estimate diamond.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, xlabel at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers well-sized (80-300 based on weight), CI lines clearly visible
+          with linewidth=3
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/gray scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight asymmetry with y-axis labels on left
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with units "Standardized Mean Difference
+          (95% CI)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate (alpha=0.3, x-axis only), but no legend for marker
+          size meaning
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct forest plot with effect sizes, CIs, and pooled estimate
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Studies on y-axis, effect sizes on x-axis with proper CI whiskers
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diamond for pooled estimate, reference line at null, marker size
+          proportional to weight
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis labels accurate, includes "Pooled Estimate"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "forest-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation in effect sizes, CI widths, and weights; includes
+          studies with CIs crossing zero
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Meta-analysis of RCTs with SMD values in realistic range (-0.21 to
+          -0.67)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: false
+        comment: Values are realistic, but all studies favor treatment (none favor
+          control), which is slightly less illustrative
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Uses deterministic data (no random seed needed), but could benefit
+          from explicit comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib.pyplot, matplotlib.patches, numpy)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of Polygon patch for diamond, but could leverage errorbar
+          or other specialized features
+  verdict: APPROVED
diff --git a/plots/forest-basic/metadata/plotly.yaml b/plots/forest-basic/metadata/plotly.yaml
index 4935236e25..df22a7a908 100644
--- a/plots/forest-basic/metadata/plotly.yaml
+++ b/plots/forest-basic/metadata/plotly.yaml
@@ -25,3 +25,175 @@ review:
   - Studies are not ordered by effect size or chronologically as suggested in spec
     - they appear in arbitrary order
   - The No Effect annotation placement could be slightly more prominent
+  image_description: The plot displays a meta-analysis forest plot with 12 studies
+    arranged vertically on the left side (Smith et al. 2018 through Robinson et al.
+    2024), with "Pooled" at the bottom. Each study is represented by a blue square
+    marker (varying sizes based on study weight) with horizontal blue lines extending
+    left and right showing confidence intervals. The pooled estimate at the bottom
+    is shown as a yellow/gold diamond shape. A vertical dashed gray line at x=0 represents
+    the "No Effect" reference line, labeled at the top. The x-axis shows "Mean Difference
+    in Blood Pressure (mmHg)" ranging from -20 to 5. Annotations at the bottom indicate
+    "← Favors Treatment" on the left and "Favors Control →" on the right. The title
+    "forest-basic · plotly · pyplots.ai" is centered at the top. The background is
+    white with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text clearly readable: title at 28pt, axis labels at 22pt, tick
+          labels at 18pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, study names well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Square markers appropriately sized (8-24 range), CI lines clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, adequate left margin for study names
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Clear x-axis label with units: "Mean Difference in Blood Pressure
+          (mmHg)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed, but grid could be more subtle (currently acceptable)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct forest plot with studies, CIs, and pooled estimate
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Effect sizes on x-axis, studies on y-axis as expected
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has all key features: square markers, CI whiskers, diamond pooled
+          estimate, reference line; studies not sorted by effect size as spec suggests'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range [-20, 5] shows all data points and CIs
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; study labels are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "forest-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows variety: different effect sizes, varying CI widths, mix of
+          more/less precise studies'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Blood pressure reduction meta-analysis is realistic and commonly
+          used
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: mmHg reductions of 3.5-12.3 are realistic for BP intervention trials
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dimensions (4800x2700 via scale=3)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of hover tooltips with custom templates, interactive HTML
+          output, add_vline for reference line
+  verdict: APPROVED
diff --git a/plots/forest-basic/metadata/plotnine.yaml b/plots/forest-basic/metadata/plotnine.yaml
index b0b5959669..e9be68fc17 100644
--- a/plots/forest-basic/metadata/plotnine.yaml
+++ b/plots/forest-basic/metadata/plotnine.yaml
@@ -25,3 +25,180 @@ review:
     better demonstrate the plot ability to show heterogeneous results
   - Grid lines only on x-axis (though appropriate for forest plots, full grid could
     be considered)
+  image_description: 'The plot displays a forest plot with 10 individual studies listed
+    vertically on the left side (Smith 2018 through Anderson 2023), each with a blue
+    horizontal error bar representing the confidence interval and a blue dot at the
+    point estimate. A vertical dashed gray line at x=0 marks the null effect reference.
+    At the bottom, a yellow diamond shape represents the pooled estimate. On the right
+    side, effect sizes with confidence intervals are displayed in text format (e.g.,
+    "-0.45 [-0.72, -0.18]"). The pooled estimate is shown in bold ("-0.31 [-0.46,
+    -0.15]"). The title follows the required format: "forest-basic · plotnine · pyplots.ai".
+    The x-axis is labeled "Mean Difference (Treatment - Control)". The background
+    is white with subtle light gray vertical grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, study names and effect values
+          clearly readable at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All text elements are well-separated with no overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points and error bars well-sized; marker sizes vary by weight as
+          specified; slightly conservative sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe; high contrast
+          against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight imbalance with more whitespace on
+          the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label "Mean Difference (Treatment - Control)"
+          with context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed; grid is subtle but only vertical lines present
+          which is appropriate for forest plots
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct forest plot with study estimates, CIs, and pooled diamond
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Effect sizes on x-axis, studies on y-axis as expected
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: point estimates, CI whiskers, diamond
+          for pooled estimate, reference line at null, marker size by weight'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this plot type; labels are inline
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "forest-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both significant (CI not crossing 0) and non-significant results;
+          varying CI widths; pooled estimate clearly shown. Could have more variation
+          in effect directions.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Meta-analysis of RCTs comparing treatment vs control is a perfect
+          real-world scenario for forest plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Effect sizes and CIs are realistic for clinical trials; all effects
+          are negative which is slightly monotonic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses plotnine's grammar of graphics
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of geom_errorbarh, geom_point with size identity, geom_polygon
+          for diamond, geom_text for labels. Uses ggplot properly but doesn't showcase
+          any particularly distinctive plotnine features beyond standard ggplot grammar.
+  verdict: APPROVED
diff --git a/plots/forest-basic/metadata/seaborn.yaml b/plots/forest-basic/metadata/seaborn.yaml
index 243ae9ac16..0aa36b279b 100644
--- a/plots/forest-basic/metadata/seaborn.yaml
+++ b/plots/forest-basic/metadata/seaborn.yaml
@@ -25,3 +25,179 @@ review:
   - sns.set_style called after plt.subplots (should be called before for idiomatic
     seaborn usage)
   - Layout could be slightly tighter on the right side to reduce whitespace
+  image_description: The forest plot displays a meta-analysis of 10 clinical studies
+    showing treatment effect (mean difference) with horizontal confidence intervals.
+    The plot uses a blue color scheme (#306998) for study markers and CI lines. Each
+    study is represented with a circular marker whose size varies proportionally to
+    study weight. Study names are listed on the left (e.g., "Johnson et al. 2019",
+    "Brown et al. 2020"), while effect sizes with 95% CIs are shown on the right in
+    monospace font (e.g., "-0.52 [-0.77, -0.27]"). A dashed vertical reference line
+    at x=0 indicates the null effect. At the bottom, a yellow diamond shows the pooled
+    estimate (-0.31 [-0.37, -0.24]). The title "forest-basic · seaborn · pyplots.ai"
+    is prominently displayed at the top. Directional annotations "← Favors Treatment"
+    and "Favors Control →" appear at the top of the plot area.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text perfectly readable: title at 24pt, axis labels at 20pt,
+          study names at 14pt, tick labels at 16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; study labels, CIs, and effect sizes are
+          well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker sizes appropriately scaled by weight, CI lines clearly visible
+          with good linewidth
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but some unused space on the right side could
+          be reduced
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive x-axis label "Mean Difference (Treatment - Control)"
+          with parenthetical context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct forest plot with point estimates and confidence intervals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Effect sizes on x-axis, studies on y-axis with proper ordering
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required: diamond for pooled estimate, vertical reference line,
+          weighted markers, CI whiskers'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; directional annotations serve informational purpose
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "forest-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows studies favoring treatment AND control, various CI widths,
+          weighted markers, pooled estimate
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic meta-analysis scenario with plausible study names and chronological
+          years
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Effect sizes (-0.52 to +0.12) and CI widths are realistic for mean
+          differences
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.patches, plt, np, pd, sns)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: sns.set_style called after figure creation (order doesn't affect
+          output but not idiomatic)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct parameters
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot with size mapping and sns.despine, but CI lines
+          use matplotlib directly
+  verdict: APPROVED
diff --git a/plots/funnel-basic/metadata/altair.yaml b/plots/funnel-basic/metadata/altair.yaml
index 684c5e419a..85284c659e 100644
--- a/plots/funnel-basic/metadata/altair.yaml
+++ b/plots/funnel-basic/metadata/altair.yaml
@@ -22,3 +22,176 @@ review:
   - Could benefit from tooltips for enhanced interactivity in the HTML version
   - Middle blue stages (Interest, Consideration, Intent) have subtle color differences
     that could be more distinct
+  image_description: 'The plot displays a basic funnel chart with 5 stages arranged
+    vertically from top to bottom: Awareness (dark blue, 1000), Interest (medium blue,
+    600), Consideration (lighter blue, 400), Intent (light blue, 200), and Purchase
+    (yellow, 100). Each stage is represented as a horizontally centered bar with rounded
+    corners. The bars progressively narrow from top to bottom, creating the classic
+    funnel shape. Stage labels appear on the left y-axis in bold text, while value
+    labels with percentages (e.g., "1000 (100.0%)", "600 (60.0%)") are positioned
+    to the right of each bar. The title "funnel-basic · altair · pyplots.ai" is centered
+    at the top. The color scheme transitions from Python blue (#306998) to yellow
+    (#FFD43B), providing good visual differentiation between stages.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, stage labels at 20pt bold, value labels at 18pt bold
+          - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar heights of 70px are well-sized, good spacing between stages
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue-to-yellow gradient is colorblind-friendly, but subtle blue variations
+          between middle stages may be hard to distinguish
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, bars well-centered, though some empty space
+          on the right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No x-axis (hidden intentionally, appropriate), but y-axis has no
+          title (appropriate for this chart type) - N/A for funnel charts
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid disabled (appropriate), no legend needed as stages are labeled
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct funnel chart type using centered horizontal bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Stages correctly mapped to y-axis, values to bar width
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, value/percentage labels,
+          proportional widths'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, stages clearly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "funnel-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear progressive decrease (1000→600→400→200→100), demonstrates
+          drop-offs between stages
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales funnel scenario with realistic stage names matching spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for a sales funnel (1000 leads → 100 purchases
+          = 10% conversion)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but technically no seed
+          set - data is hardcoded so this is fine (3/3)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All Altair APIs are current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative grammar well with mark_bar, encode, x/x2
+          for bar range. Could have used tooltips or interactivity for bonus points.
+  verdict: APPROVED
diff --git a/plots/funnel-basic/metadata/bokeh.yaml b/plots/funnel-basic/metadata/bokeh.yaml
index 6b18f18558..91aaf7466b 100644
--- a/plots/funnel-basic/metadata/bokeh.yaml
+++ b/plots/funnel-basic/metadata/bokeh.yaml
@@ -22,3 +22,161 @@ review:
   - 'Minor: Could improve canvas utilization by making funnel slightly larger to reduce
     bottom whitespace'
   - 'Minor: Color palette includes red-green combination (though not as sole differentiator)'
+  image_description: 'The plot displays a funnel chart with 5 trapezoidal segments
+    representing a sales funnel from top to bottom: Awareness (blue #306998, 1,000
+    - 100%), Interest (yellow #FFD43B, 600 - 60%), Consideration (red #E74C3C, 400
+    - 40%), Intent (purple #9B59B6, 200 - 20%), and Purchase (green #27AE60, 100 -
+    10%). Each segment contains centered white text (dark text on yellow) with the
+    stage name in bold and the value with percentage below. The funnel progressively
+    narrows from top to bottom. A legend on the right side lists all stages with their
+    values. The title "funnel-basic · bokeh · pyplots.ai" is centered at the top.
+    Axes and grid are hidden for a clean presentation.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text clearly readable at 28-36pt sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: funnel segments well-sized and visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: distinct colors, slight deduction for red-green in palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: good centered layout, minor whitespace at bottom
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: legend well-placed, grid appropriately hidden
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct funnel with trapezoidal segments
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: stages/values correctly mapped, width proportional
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: distinct colors, labels with percentages, proper narrowing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: legend labels accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: correct format "funnel-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows progressive decrease through all stages
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: realistic sales funnel example from spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: sensible conversion rates (1000→100)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: deterministic data, no random values
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: patch() for custom shapes, Label model, Legend with LegendItem, HTML
+          export
+  verdict: APPROVED
diff --git a/plots/funnel-basic/metadata/highcharts.yaml b/plots/funnel-basic/metadata/highcharts.yaml
index 2877fa6b6e..8d5a9cd4b5 100644
--- a/plots/funnel-basic/metadata/highcharts.yaml
+++ b/plots/funnel-basic/metadata/highcharts.yaml
@@ -29,3 +29,170 @@ review:
     for better balance
   - VQ-07 could be improved with subtle background styling or stage separators
   - Library features score could be higher by adding hover tooltips or animation effects
+  image_description: |-
+    The plot displays a funnel chart with 5 sequential stages of a sales funnel. The title "funnel-basic · highcharts · pyplots.ai" appears at the top in bold black text. The funnel narrows from top to bottom with distinct colors for each stage:
+    - **Awareness** (dark blue #306998): Widest segment at top, labeled "Awareness: 1,000 (43.5%)"
+    - **Interest** (yellow #FFD43B): Second segment, labeled "Interest: 600 (26.1%)"
+    - **Consideration** (purple #9467BD): Middle segment, labeled "Consideration: 400 (17.4%)"
+    - **Intent** (cyan #17BECF): Narrow segment, labeled "Intent: 200 (8.7%)"
+    - **Purchase** (brown #8C564B): Narrowest at bottom, labeled "Purchase: 100 (4.3%)"
+
+    Data labels are positioned to the right of each segment with connector lines. The funnel shape is well-proportioned with a neck at the bottom. White background with good canvas utilization.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and data labels are clearly readable at 72px and 36px respectively,
+          though labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels positioned cleanly to the right with
+          connectors
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Funnel segments are well-sized and clearly visible with good proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette, no red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization (~60%), slight asymmetry with labels all
+          on right
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend disabled which is appropriate, but no grid context needed
+          for funnel
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct funnel chart type with narrowing segments
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Stages and values correctly mapped to funnel segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, value labels, percentage
+          labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 5 stages visible from 1000 to 100
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled but segment labels are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "funnel-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows progressive decrease through all stages; could show more dramatic
+          drop-offs to emphasize conversion losses
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales funnel is a perfect real-world scenario with plausible stage
+          names
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable (1000→100), though conversion rates are somewhat
+          optimistic for real sales
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic but no random seed needed; slight deduction
+          for hardcoded values without comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses FunnelSeries, data labels with connectors, neck configuration;
+          could leverage more Highcharts-specific features like tooltips or animations
+  verdict: APPROVED
diff --git a/plots/funnel-basic/metadata/letsplot.yaml b/plots/funnel-basic/metadata/letsplot.yaml
index b3539f4a2c..bd82a2bcce 100644
--- a/plots/funnel-basic/metadata/letsplot.yaml
+++ b/plots/funnel-basic/metadata/letsplot.yaml
@@ -23,3 +23,180 @@ review:
   - Could add np.random.seed(42) comment for reproducibility documentation even though
     data is deterministic
   - HTML output generated but not required by spec (minor)
+  image_description: 'The plot displays a well-designed funnel chart with 5 trapezoidal
+    segments representing sales pipeline stages: Awareness (1,000 - 100%), Interest
+    (600 - 60%), Consideration (400 - 40%), Intent (200 - 20%), and Purchase (100
+    - 10%). The segments narrow progressively from top to bottom. Colors used are:
+    dark blue (#306998) for Awareness, light blue (#4A90D9) for Interest, yellow (#FFD43B)
+    for Consideration, orange (#F5A623) for Intent, and dark orange/brown (#D45D00)
+    for Purchase. Each segment contains bold white text showing the stage name, value
+    with comma formatting, and percentage. White borders (2px) separate the segments
+    cleanly. The title "funnel-basic · letsplot · pyplots.ai" appears centered at
+    the top in bold. The chart uses theme_void() for a clean background with no axes
+    or grid.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text is bold, white, and perfectly readable against the colored
+          backgrounds. Title is appropriately sized.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; each label is well-centered within its trapezoid
+          segment.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Trapezoid segments are well-sized and clearly visible with good proportions.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color progression from blue to orange; distinct enough for colorblind
+          users, though blue/yellow adjacent could be slightly better differentiated.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Funnel is well-centered and fills the canvas appropriately with balanced
+          margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for funnel charts (theme_void removes axes intentionally).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate for funnel), legend correctly hidden since labels
+          are on segments.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct funnel chart with trapezoidal segments narrowing top to bottom.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Stage names and values correctly mapped to segments.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors per stage, value/percentage
+          labels, proportional widths.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 5 stages visible and properly ordered.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Labels on segments serve as legend; correctly formatted.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "funnel-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows progressive decrease across stages, demonstrating funnel conversion
+          concept perfectly.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales funnel is a classic, realistic use case with sensible stage
+          names.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (1000→600→400→200→100) are realistic for a sales funnel scenario.
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → polygon building → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Deterministic data (no random), but no explicit seed statement. Data
+          is hardcoded so reproducible, partial credit.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (pandas, lets_plot, export).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' but uses export_ggsave with path='.' which is
+          correct, however also saves plot.html which wasn't strictly required.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_polygon for custom shapes, scale_y_reverse, theme_void,
+          ggsize - good lets-plot usage but could use more advanced features like
+          tooltips or annotations.
+  verdict: APPROVED
diff --git a/plots/funnel-basic/metadata/matplotlib.yaml b/plots/funnel-basic/metadata/matplotlib.yaml
index 940a7fd215..b938c0133e 100644
--- a/plots/funnel-basic/metadata/matplotlib.yaml
+++ b/plots/funnel-basic/metadata/matplotlib.yaml
@@ -26,3 +26,174 @@ review:
     distinctive features that could simplify the code
   - Color palette uses similar blue shades for first two stages which reduces visual
     distinction
+  image_description: 'The plot displays a funnel chart with 5 stages arranged vertically
+    from top to bottom. The stages are: Awareness (1,000 - 100%), Interest (600 -
+    60%), Consideration (400 - 40%), Intent (200 - 20%), and Purchase (100 - 10%).
+    Each stage is rendered as a trapezoid segment that narrows progressively. The
+    color scheme transitions from dark blue (#306998) at the top through lighter blue
+    (#4A8BBF), yellow (#FFD43B), orange (#FFB347), to coral red (#FF6B6B) at the bottom.
+    White edge lines separate each segment. Text labels are centered within each segment
+    showing the stage name, count with comma formatting, and percentage. The top two
+    stages use white text while the bottom three use black text for contrast. The
+    title "funnel-basic · matplotlib · pyplots.ai" appears at the top in bold black
+    text. The background is white.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text is clearly readable with appropriate font sizes (18pt for
+          labels, 24pt for title)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels centered within their segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Trapezoid segments are well-sized and clearly visible with good proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Distinct colors for each stage, though the blue shades are similar;
+          not purely red-green dependent
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, funnel is centered
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for funnel chart (axis is off), but labels on segments compensate
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed for funnel chart, no legend needed as labels are embedded
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct funnel chart with trapezoid segments narrowing from top to
+          bottom
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Stages and values correctly mapped to segment widths and labels
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, value/percentage labels,
+          proportional widths'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 5 stages visible with appropriate proportions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (embedded labels serve this purpose effectively)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "funnel-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows progressive decrease across 5 stages; could show more dramatic
+          variation between stages
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales funnel scenario from spec is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (1000 → 100) are realistic for a sales funnel
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.patches and matplotlib.pyplot are imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic Polygon patches; could leverage matplotlib's more advanced
+          features
+  verdict: APPROVED
diff --git a/plots/funnel-basic/metadata/plotly.yaml b/plots/funnel-basic/metadata/plotly.yaml
index 03b756ac09..109b795d86 100644
--- a/plots/funnel-basic/metadata/plotly.yaml
+++ b/plots/funnel-basic/metadata/plotly.yaml
@@ -23,3 +23,173 @@ review:
   weaknesses:
   - The blue color gradient (4 shades of blue + yellow) could be improved for better
     accessibility - consider using a more varied colorblind-safe palette
+  image_description: 'The plot displays a vertical funnel chart with 5 stages: Awareness
+    (dark blue, #306998), Interest (steel blue, #4682B4), Consideration (teal, #5F9EA0),
+    Intent (light blue, #6CA6CD), and Purchase (yellow, #FFD43B). Each trapezoidal
+    segment shows both count and percentage of initial (1000/100%, 600/60%, 400/40%,
+    200/20%, 100/10%). Stage names appear on the left y-axis, with "Stage" as the
+    axis label. Gray connector lines separate segments. The title "funnel-basic ·
+    plotly · pyplots.ai" is centered at the top. The funnel progressively narrows
+    from top to bottom, clearly illustrating the conversion drop-off at each stage.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 20pt, segment
+          text at 20pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and values clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Funnel segments are well-sized and proportional; clear visual hierarchy
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but the blue variations may be challenging
+          for some colorblind viewers; yellow provides good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization; funnel fills ~60% of canvas with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Stage" label (descriptive) but no units needed; X-axis
+          has "Count" but is not prominently visible in funnel view
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed for this chart type; no grid visible which is appropriate
+          for funnel charts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct funnel chart type with trapezoidal segments
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Stages correctly mapped to y-axis, values to segment widths
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors per stage, value/percentage
+          labels, proportional widths'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 5 stages visible with full value range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; stage names serve as labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "funnel-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows progressive decrease across all stages with clear drop-offs
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales funnel scenario with realistic stage names from spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (1000→100) are realistic for a sales funnel conversion
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported, which is used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Funnel which is Plotly's native funnel implementation; also
+          generates interactive HTML output
+  verdict: APPROVED
diff --git a/plots/funnel-basic/metadata/plotnine.yaml b/plots/funnel-basic/metadata/plotnine.yaml
index fcd89193c7..2fbda38408 100644
--- a/plots/funnel-basic/metadata/plotnine.yaml
+++ b/plots/funnel-basic/metadata/plotnine.yaml
@@ -24,3 +24,171 @@ review:
     necessary given file naming constraints)
   - Uses rectangular bars rather than true trapezoids, so it is technically a bar
     funnel rather than a classic funnel shape
+  image_description: 'The plot displays a funnel chart with 5 horizontal rectangular
+    bars stacked vertically, representing a sales funnel. The bars progressively narrow
+    from top to bottom: "Awareness" (widest, dark blue #306998, showing 1,000/100%),
+    "Interest" (medium blue, 600/60%), "Consideration" (lighter blue, 400/40%), "Intent"
+    (even lighter, 200/20%), and "Purchase" (lightest blue, smallest bar, 100/10%).
+    Each bar has white text centered showing the stage name, value with comma formatting,
+    and percentage. The title "funnel-basic · plotnine · pyplots.ai" appears at the
+    top in bold black text. The background is white with no axes, grid, or legend
+    visible. The chart uses a blue gradient color scheme progressing from darker to
+    lighter shades.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is bold and readable at 24pt, labels on bars are readable but
+          slightly small for the canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels fit within their respective bars
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible, good proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue gradient is colorblind-safe, good contrast with white text
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though some empty space at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for funnel charts (no axes needed), appropriately removed
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend disabled which is appropriate, but no grid needed anyway
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct funnel chart visualization with narrowing bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Stages correctly ordered top-to-bottom, widths proportional to values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, value/percentage labels,
+          proportional widths'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 5 stages visible and properly scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden (labels are on bars)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "funnel-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear progression with decreasing values at each stage
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales funnel scenario is realistic and commonly used
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (1000→600→400→200→100) are realistic conversion rates
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but sys.path manipulation
+          is a code smell
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (geom_rect, aes, theme_minimal), but funnel is
+          simulated with rectangles rather than using any special plotnine feature
+  verdict: APPROVED
diff --git a/plots/funnel-basic/metadata/pygal.yaml b/plots/funnel-basic/metadata/pygal.yaml
index 8f408a23ca..3d19f82559 100644
--- a/plots/funnel-basic/metadata/pygal.yaml
+++ b/plots/funnel-basic/metadata/pygal.yaml
@@ -25,3 +25,165 @@ review:
     imbalance
   - Horizontal funnel orientation differs from spec description of top to bottom narrowing
     (pygal library design choice)
+  image_description: 'The plot displays a horizontal funnel chart with 5 stages representing
+    a sales conversion funnel. From left to right: Awareness (blue, 1,000), Interest
+    (yellow, 600), Consideration (teal, 400), Intent (peach, 200), and Purchase (olive
+    green, 100). Each segment is trapezoidal and narrows progressively toward the
+    right, forming a pointed funnel shape. The title "funnel-basic · pygal · pyplots.ai"
+    appears at the top center. A legend at the bottom identifies each stage with colored
+    markers. The values are displayed as formatted numbers (with comma separators)
+    on each segment.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title and value labels are clearly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Funnel segments are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors that work well for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: false
+        comment: Large whitespace gap between funnel and legend at bottom; funnel
+          is vertically centered but legend appears isolated
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is present and accurate, no grid needed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct funnel chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Stages and values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: false
+        comment: Values displayed but percentages from formatter not showing; horizontal
+          orientation differs from spec's "top to bottom" (library limitation)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All 5 stages correctly labeled in legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "funnel-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear progressive decrease across all 5 stages
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales funnel is a classic, realistic use case matching spec example
+          exactly
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic (1000 → 100 with reasonable drop-offs)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean linear structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no randomness needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using strict=True in zip is fine but print_values/value_formatter
+          may not be rendering as expected
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses pygal.Funnel with custom Style, legend_at_bottom, value formatting;
+          however the value_formatter doesn't appear to be rendering the percentage
+          portion
+  verdict: APPROVED
diff --git a/plots/funnel-basic/metadata/seaborn.yaml b/plots/funnel-basic/metadata/seaborn.yaml
index fe37169622..eb6a5d48d1 100644
--- a/plots/funnel-basic/metadata/seaborn.yaml
+++ b/plots/funnel-basic/metadata/seaborn.yaml
@@ -24,3 +24,157 @@ review:
     output - may be positioned outside canvas bounds
   - Could use seaborn color palette more distinctively (e.g., using a named sequential
     palette)
+  image_description: 'The plot displays a funnel chart for a sales pipeline with 5
+    stages arranged vertically from top to bottom: Awareness (1,000 / 100%), Interest
+    (600 / 60%), Consideration (400 / 40%), Intent (200 / 20%), and Purchase (100
+    / 10%). Each stage is rendered as a trapezoidal segment with widths proportional
+    to their values. The color palette transitions from deep blue (#306998) at the
+    top through lighter blues to gold/yellow (#FFD43B, #E8C547) at the bottom. Stage
+    names appear on the left side in bold dark gray text, while values with percentages
+    are centered within each segment in white (for blue segments) or dark text (for
+    gold segments). The title "funnel-basic · seaborn · pyplots.ai" appears at the
+    top. The layout is clean with a white background and good spacing between segments.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, stage names at 20pt, values at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Trapezoid segments are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue-to-gold gradient is colorblind-friendly, though the gold shades
+          are similar
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, funnel is well-centered, though conversion rate
+          annotations on the right are missing from the rendered output
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct funnel chart with trapezoidal segments narrowing top to bottom
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Stages correctly ordered from largest to smallest
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has value labels, percentages, distinct colors; conversion rates
+          coded but not visible in output
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 5 stages displayed with correct proportions
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "funnel-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows progressive decrease across all stages, demonstrates funnel
+          drop-off pattern well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses the exact sales funnel example from the specification
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (1000→100) are realistic for a conversion funnel
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: numpy imported but seed not strictly necessary for deterministic
+          data
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib/seaborn APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct parameters
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn styling
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_theme() and sns.color_palette(), but core drawing uses
+          matplotlib Polygon; this is acceptable since seaborn doesn't have a native
+          funnel chart
+  verdict: APPROVED
diff --git a/plots/gain-curve/metadata/altair.yaml b/plots/gain-curve/metadata/altair.yaml
index 26b486ccd5..5c5f988326 100644
--- a/plots/gain-curve/metadata/altair.yaml
+++ b/plots/gain-curve/metadata/altair.yaml
@@ -25,3 +25,175 @@ review:
     moving to a less intrusive location
   - The axis labels include (%) in the title text but this could be formatted more
     consistently
+  image_description: The plot shows a cumulative gains chart with a 16:9 aspect ratio.
+    The title "gain-curve · altair · pyplots.ai" appears at the top center in a dark
+    font. The main model curve is displayed as a solid blue line (#306998) that starts
+    at the origin (0,0) and curves upward above the diagonal, reaching 100% at the
+    right edge. A light blue shaded area fills under the model curve, providing visual
+    emphasis. The diagonal dashed gray line represents the "Random (Baseline)" reference.
+    The x-axis is labeled "Population Targeted (%)" ranging 0-100, and the y-axis
+    is labeled "Positive Cases Captured (%)" ranging 0-100. A custom legend in the
+    upper-left area shows "Model" (solid blue line) and "Random (Baseline)" (dashed
+    gray line). The grid is subtle with light dashed lines. Text sizes are appropriate
+    and readable.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 28pt, axis labels are 22pt, tick labels are 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line thickness of 4 is appropriate, curves are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue vs gray provides excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units (% is shown but not in label
+          format "Population Targeted (%)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (good), but custom legend placement could be improved
+          - it's positioned on top of the data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative gains chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows population percentage, Y-axis shows cumulative gain
+          percentage
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes diagonal baseline reference line as required
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes show 0-100% range correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Model" and "Random (Baseline)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows "{spec-id} · {library} · pyplots.ai" format exactly
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows model with good discrimination above baseline, demonstrates
+          the key property of gains curves
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer response prediction scenario is plausible and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values are reasonable but the model's lift over baseline is modest;
+          a steeper initial curve would better demonstrate the value proposition
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png AND plot.html (correct for Altair)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of Altair's layered composition, declarative encoding, monotone
+          interpolation, custom legend via mark layers
+  verdict: APPROVED
diff --git a/plots/gain-curve/metadata/bokeh.yaml b/plots/gain-curve/metadata/bokeh.yaml
index d5727b1e4b..c00a39638c 100644
--- a/plots/gain-curve/metadata/bokeh.yaml
+++ b/plots/gain-curve/metadata/bokeh.yaml
@@ -24,3 +24,176 @@ review:
     strength)
   - Legend border line is slightly heavy at 2px width
   - Text sizes could be increased slightly for better readability at full resolution
+  image_description: 'The plot displays a cumulative gains chart on a light gray background
+    (#fafafa). The title "gain-curve · bokeh · pyplots.ai" appears in the top-left
+    corner. The X-axis is labeled "Percentage of Population Targeted (%)" and spans
+    0-100, while the Y-axis is labeled "Percentage of Positive Cases Captured (%)"
+    spanning 0-105. Three curves are shown: (1) a solid blue line (#306998) representing
+    the model gain curve with a steep initial slope that rises quickly from 0 to ~90%
+    captured within the first 15% of population, then gradually approaches 100%; (2)
+    a gray dashed diagonal baseline from (0,0) to (100,100) representing random selection;
+    (3) a yellow/gold dotted line showing the perfect model that rises vertically
+    to 100% at ~15% population, then stays horizontal. The legend is positioned on
+    the right side with clear labels. Subtle dashed grid lines are visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable at 28pt/22pt/18pt
+          respectively; slightly small for the canvas but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are clearly visible with good width (line_width=4 for model,
+          3 for others); the curves are well distinguished
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, gray, and yellow are colorblind-safe and easily distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; legend placement on right is appropriate
+          but creates some empty space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units (%)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3) but legend border is a bit heavy
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative gains chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows population %, Y shows captured positives %
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes model curve, random baseline (diagonal), and perfect model
+          reference
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-100% range displayed on both axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three curves
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "{spec-id} · {library} · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows steep initial slope indicating good model discrimination; perfect
+          model shows expected shape; baseline is correct diagonal
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer response model with 15% positive rate is a realistic marketing
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 1000 samples produces smooth curve; beta distributions create realistic
+          score separation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both PNG and HTML which is correct for Bokeh
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource properly and Legend placement; could have added
+          HoverTool for interactivity
+  verdict: APPROVED
diff --git a/plots/gain-curve/metadata/highcharts.yaml b/plots/gain-curve/metadata/highcharts.yaml
index 3bfe2afb43..03393d79f4 100644
--- a/plots/gain-curve/metadata/highcharts.yaml
+++ b/plots/gain-curve/metadata/highcharts.yaml
@@ -24,3 +24,179 @@ review:
     corner or outside the plot area
   - Could add annotations to highlight key insights (e.g., 50% of positives captured
     with 10% targeting)
+  image_description: The plot displays a cumulative gains chart with a blue-filled
+    area curve ("Model Gain") that rises steeply from the origin, capturing approximately
+    90% of positive cases by targeting just 20% of the population, and reaches 100%
+    around 50% population. A yellow dashed diagonal line ("Random Selection") represents
+    the baseline random model. The title "gain-curve · highcharts · pyplots.ai" appears
+    at the top with subtitle "Customer Response Model Performance". X-axis shows "Population
+    Targeted (%)" from 0-100, Y-axis shows "Positive Cases Captured (%)" from 0-100.
+    Both axes have 10-unit tick intervals with subtle dashed grid lines. A white-background
+    legend box positioned in the right portion of the chart shows both series. The
+    area fill uses a gradient from solid blue (#306998) at top to light transparent
+    blue at bottom.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          4800x2700 resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: The gain curve line and area fill are clearly visible, baseline dashed
+          line is distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow colors are colorblind-safe and provide good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas, though legend placement in the middle-right overlaps
+          with the curve area slightly
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Population Targeted (%)", "Positive
+          Cases Captured (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend position overlaps the data area; could be moved to top-left
+          or outside
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative gains chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows population percentage, Y shows cumulative positive cases
+          captured
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes gain curve AND diagonal reference line for random selection
+          as required by spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes properly show 0-100% range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Model Gain" and "Random Selection"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "gain-curve · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good model discrimination with steep initial slope; could show
+          more variation in the curve shape
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer response model is a realistic and neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 15% positive rate and 1000 samples are reasonable; beta distributions
+          create plausible score overlap
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → chart config → series → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of AreaSeries with gradient fill_color and SplineSeries
+          for smooth baseline; tooltip configuration present. Could leverage more
+          Highcharts-specific features like annotations or hover effects.
+  verdict: APPROVED
diff --git a/plots/gain-curve/metadata/letsplot.yaml b/plots/gain-curve/metadata/letsplot.yaml
index fa524f15f3..98a8e7a264 100644
--- a/plots/gain-curve/metadata/letsplot.yaml
+++ b/plots/gain-curve/metadata/letsplot.yaml
@@ -26,3 +26,175 @@ review:
   - X-axis scale extends slightly beyond 100% which looks unintended
   - Model curve appears stepped/jagged due to discrete data points rather than smoothly
     interpolated
+  image_description: |-
+    The plot displays a cumulative gains chart with three lines on a white background with subtle gray grid lines:
+    - **Model curve (blue, #306998)**: A jagged/stepped blue line showing the model's cumulative gain, starting from origin and rising steeply initially before gradually flattening as it approaches 100%
+    - **Random baseline (gray, #888888)**: A straight diagonal line from (0,0) to (100,100) representing random selection
+    - **Perfect model (yellow, #FFD43B)**: Shows the theoretical perfect classifier - rises steeply vertically until reaching 100% gain at ~20% population, then remains horizontal at 100%
+
+    The X-axis shows "Population Targeted (%)" ranging from -5 to 100+, Y-axis shows "Positive Cases Captured (%)" from 0 to 100. Title reads "gain-curve · letsplot · pyplots.ai". Legend is positioned in the lower right area, clearly labeled "Curve" with entries for Model, Random, and Perfect.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are clearly visible with good thickness (size=1.5), though
+          the model line appears stepped/jagged rather than smooth
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, gray, and yellow provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout, but X-axis extends to 100+ (showing "10" label beyond
+          100) which is slightly awkward
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Population Targeted (%)" and "Positive
+          Cases Captured (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No visible grid lines (theme_minimal removes them), legend placement
+          is good but grid is absent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative gains chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=population percentage, Y=cumulative gains - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required: model curve, random baseline diagonal, perfect model
+          reference'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Shows full 0-100% range on both axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels "Model", "Random", "Perfect" are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "gain-curve · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows model performing better than random with clear lift, includes
+          perfect model comparison; model curve could show more pronounced initial
+          steepness
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer response model is a realistic marketing scenario per spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scale is appropriate, though positive rate of ~20% is reasonable
+          for response data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with proper theme customization and
+          ggsize; pd.melt for data reshaping is good practice, but could use more
+          distinctive lets-plot features
+  verdict: APPROVED
diff --git a/plots/gain-curve/metadata/matplotlib.yaml b/plots/gain-curve/metadata/matplotlib.yaml
index 624b3ad02e..47496f38f0 100644
--- a/plots/gain-curve/metadata/matplotlib.yaml
+++ b/plots/gain-curve/metadata/matplotlib.yaml
@@ -25,3 +25,179 @@ review:
     through
   - Square aspect ratio (1:1) while appropriate for the 0-100% axes creates some unused
     corner space compared to 16:9
+  image_description: The plot displays a cumulative gains chart on a square canvas
+    with 1:1 aspect ratio. The main model curve is shown in blue (solid line, linewidth=3),
+    starting from origin (0,0) and curving upward to (100,100). A gray dashed diagonal
+    line represents the random baseline. A yellow/gold dotted line shows the perfect
+    model - rising steeply to 100% at approximately 50% of population, then horizontal.
+    A light blue shaded area fills the space between the model curve and the baseline,
+    visually highlighting the model's "lift" over random selection. An annotation
+    box in the lower portion reads "Top 20% captures 35% of positive cases" with an
+    arrow pointing to the corresponding point on the curve. The legend is positioned
+    in the lower right corner. All text is clearly legible with appropriate font sizes.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, ticks 16pt, legend 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, annotation well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are appropriately sized (linewidth=3 for model, 2 for others),
+          clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, gray, and yellow are colorblind-safe and have good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with equal aspect, though square format creates
+          some empty space in corners
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Population Targeted (%)" and "Positive Cases Captured (%)" are
+          descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid at alpha=0.3 is good, but legend could be positioned better
+          (lower right overlaps with data region)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative gains chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=population percentage, Y=cumulative positives captured - correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has diagonal baseline, model curve, perfect model reference, annotation
+          showing key insight
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show 0-100% for both dimensions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify Model, Random (Baseline), Perfect
+          Model
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "gain-curve · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows model lift over baseline and comparison to perfect model; the
+          model demonstrates clear discrimination with steeper initial slope
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer response model is a real, neutral business scenario (marketing
+          campaign optimization)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are sensible; 35% capture at 20% targeting is realistic for
+          a decent model, though the lift could be more dramatic to better showcase
+          the plot type
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API (Axes methods)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses fill_between for shaded area, annotation with arrowprops, proper
+          Axes methods - good but not exceptional matplotlib features
+  verdict: APPROVED
diff --git a/plots/gain-curve/metadata/plotly.yaml b/plots/gain-curve/metadata/plotly.yaml
index c2518ee56e..197c871f23 100644
--- a/plots/gain-curve/metadata/plotly.yaml
+++ b/plots/gain-curve/metadata/plotly.yaml
@@ -26,3 +26,179 @@ review:
     line
   - Could better leverage Plotly interactive features (hover tooltips with specific
     values, annotations for key thresholds)
+  image_description: 'The plot displays a cumulative gains chart with three lines
+    on a white background. The main model curve (solid dark blue, #306998) shows the
+    cumulative percentage of positives captured as population percentage increases,
+    with a light blue fill area between the model curve and the baseline. A dashed
+    gray diagonal line represents the random baseline (from 0,0 to 100,100). A dotted
+    yellow/gold line shows the perfect model - rising vertically to 100% at around
+    20% of population, then horizontal to 100%. The x-axis is labeled "Percentage
+    of Population Targeted (%)" (0-100), y-axis is "Percentage of Positives Captured
+    (%)" (0-100). Title "gain-curve · plotly · pyplots.ai" is centered at top. Legend
+    is positioned in lower-right corner with entries for Model, Perfect Model, and
+    Random (Baseline). Grid lines are subtle gray. The model curve demonstrates good
+    discrimination, rising steeply initially and capturing ~80% of positives by targeting
+    ~30% of population.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are well-sized (width 3-4), though model line could be slightly
+          thicker for better visibility against fill area
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/gray/yellow are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units (%)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (good), but legend placement in lower-right corner
+          partially overlaps with the baseline diagonal where it crosses
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative gains chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=population %, Y=positives captured % - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes diagonal baseline (random), perfect model curve, and main
+          model curve as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes show 0-100% range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels are accurate and descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "gain-curve · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows model discrimination well, includes perfect model reference;
+          could show slight variations in model quality to demonstrate different scenarios
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer response model evaluation is a realistic, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 20% positive rate and model performance are realistic; 1000 samples
+          produces slightly granular curve
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean flow: imports → data → calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses fill="tonexty" for area fill which is nice, but doesn't leverage
+          Plotly's interactive features like hover tooltips with additional info (e.g.,
+          showing exact values on hover), annotations for key points, or range sliders
+  verdict: APPROVED
diff --git a/plots/gain-curve/metadata/plotnine.yaml b/plots/gain-curve/metadata/plotnine.yaml
index e8c84476b7..c3dcda6f09 100644
--- a/plots/gain-curve/metadata/plotnine.yaml
+++ b/plots/gain-curve/metadata/plotnine.yaml
@@ -27,3 +27,183 @@ review:
     at line 27)
   - Could benefit from annotation showing key metrics like the area under the curve
     or lift at specific percentiles
+  image_description: The plot displays a cumulative gains chart with three distinct
+    curves on a white background. The blue line represents the "Model" curve showing
+    actual model performance, starting at origin (0,0) and rising steeply to capture
+    about 80% of positives by targeting 20% of the population, then leveling off to
+    reach 100% around 35% population. The yellow/gold line shows the "Perfect Model"
+    which rises vertically from 0 to 100% at approximately 17% population (the positive
+    class rate), then extends horizontally. The gray diagonal line represents the
+    "Random (Baseline)" going from (0,0) to (100,100). The title reads "gain-curve
+    · plotnine · pyplots.ai". X-axis is labeled "Population Targeted (%)" and Y-axis
+    is "Positive Cases Captured (%)", both ranging 0-100 with gridlines at 20-unit
+    intervals. Legend is positioned on the right side showing all three curve types.
+    All text is clearly legible with appropriate font sizes.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 2.5 is excellent for visibility, all three curves clearly
+          distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998), gray (#888888), and yellow (#FFD43B) are colorblind-safe
+          with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with plot filling canvas well, legend positioned nicely
+          but could be slightly better integrated
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Population Targeted (%)" and "Positive
+          Cases Captured (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is very subtle (alpha 0.2-0.3 is good), but legend title "Curve"
+          is generic
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative gains chart with model curve, perfect model, and
+          baseline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis correctly shows population percentage, Y-axis shows cumulative
+          positive capture
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes diagonal baseline reference, perfect model line, and model
+          curve as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes show full 0-100% range as appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three curves
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "gain-curve · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good model discrimination with steep initial slope, but perfect
+          model transition could be more emphasized
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Marketing response classification model is a perfect real-world application
+          for gain curves
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 1000 samples with ~17% positive rate is realistic, though positive
+          rate is slightly low for marketing
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used, no unnecessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but variable n_samples is redefined mid-code
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with geom_line, scale_color_manual, and
+          theme_minimal, but doesn't use advanced plotnine features like annotations
+          or stat layers
+  verdict: APPROVED
diff --git a/plots/gain-curve/metadata/pygal.yaml b/plots/gain-curve/metadata/pygal.yaml
index 112f0b966d..9465531681 100644
--- a/plots/gain-curve/metadata/pygal.yaml
+++ b/plots/gain-curve/metadata/pygal.yaml
@@ -25,3 +25,170 @@ review:
     isolated
   - Could add a Perfect Model reference line (vertical to 100% at positive rate, then
     horizontal) as mentioned in spec notes
+  image_description: 'The plot displays a cumulative gains chart on a white background
+    at 4800×2700 pixels. The title "gain-curve · pygal · pyplots.ai" appears at the
+    top center in dark gray text. The X-axis is labeled "Population Targeted (%)"
+    ranging from 0 to 100, and the Y-axis is labeled "Cumulative Gains (%)" also ranging
+    from 0 to 100. Two lines are plotted: a blue "Model Gains" curve that shows the
+    cumulative gains achieved by the model (starting steep then flattening), and a
+    gray "Random Baseline" diagonal line representing random selection. The legend
+    appears in the top-left corner showing "Model Gains" (blue) and "Random Baseline"
+    (gray). Grid lines are visible with subtle gray coloring. The model curve clearly
+    shows better-than-random performance as it consistently stays above the diagonal
+    baseline.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are readable; legend text is slightly
+          small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are visible with good stroke width; could benefit from slightly
+          thicker lines
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue vs gray provides excellent contrast and is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Population Targeted (%)" and "Cumulative
+          Gains (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is positioned in top-left corner outside the plot area, somewhat
+          isolated; grid is appropriately subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct XY line chart for cumulative gains
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows population percentage, Y shows cumulative gains percentage
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes model curve, random baseline diagonal reference line
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes correctly show 0-100% range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Model Gains" and "Random Baseline"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "gain-curve · pygal · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows model gains curve with steeper initial slope demonstrating
+          good model discrimination; missing perfect model reference
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses sklearn make_classification for realistic binary classification
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 1000 samples with 70/30 class imbalance is appropriate; curve shape
+          is realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pygal, Style, sklearn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct), but implementation includes
+          unnecessary sklearn dependency for sample data
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/gain-curve/metadata/seaborn.yaml b/plots/gain-curve/metadata/seaborn.yaml
index a591cf45ef..4728c7da24 100644
--- a/plots/gain-curve/metadata/seaborn.yaml
+++ b/plots/gain-curve/metadata/seaborn.yaml
@@ -24,3 +24,179 @@ review:
     offers little advantage over pure matplotlib
   - Legend placement in lower right partially overlaps the data region; upper left
     would be cleaner
+  image_description: 'The plot displays a cumulative gains chart with a 16:9 landscape
+    format on a white background with subtle gray gridlines. The title "gain-curve
+    · seaborn · pyplots.ai" is prominently displayed at the top in a large black font.
+    The X-axis is labeled "Customers Targeted (%)" ranging from 0 to 100, and the
+    Y-axis is labeled "Churners Captured (%)" ranging from 0 to ~105. Three lines
+    are shown: (1) a solid dark blue line labeled "Churn Prediction Model" showing
+    the cumulative gains curve with a steep initial slope that flattens as it approaches
+    100%; (2) a gray dashed diagonal line labeled "Random Selection (Baseline)" representing
+    random model performance; (3) a yellow/gold dotted line labeled "Perfect Model"
+    that rises steeply to 100% then remains horizontal. A light blue semi-transparent
+    fill highlights the area between the model curve and the baseline diagonal, illustrating
+    the model''s lift over random selection. The legend is positioned in the lower
+    right corner and is clearly readable.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths (3.5 for model, 2.5 for others) are excellent for this
+          chart type
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/gray/yellow palette is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Customers Targeted (%)" and "Churners
+          Captured (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle with alpha=0.3, but legend could be positioned better
+          (upper left would avoid the data area more)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative gains chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=population percentage, Y=cumulative positives captured - correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has model curve, diagonal baseline reference, and perfect model curve
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show 0-100% on X and 0-105% on Y, appropriate for the data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels are accurate and descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows model performance above baseline with good lift, steep initial
+          slope, includes perfect model for comparison. Minor deduction: could show
+          more variation in the curve shape'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer churn prediction is a realistic, neutral business scenario
+          matching spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 1000 samples with ~30% positive class is reasonable; values are sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot and sns.set_style, but these are basic seaborn
+          features. The shaded area uses matplotlib's fill_between rather than a seaborn-specific
+          feature
+  verdict: APPROVED
diff --git a/plots/gantt-basic/metadata/altair.yaml b/plots/gantt-basic/metadata/altair.yaml
index 05912873df..3968adf6fc 100644
--- a/plots/gantt-basic/metadata/altair.yaml
+++ b/plots/gantt-basic/metadata/altair.yaml
@@ -28,3 +28,181 @@ review:
     2024 instead of Timeline)
   - Could demonstrate more Altair-specific features like layered marks or selections
   - Minor whitespace imbalance between chart area and margins
+  image_description: 'The plot displays a well-organized Gantt chart for a software
+    development project with 12 tasks. Tasks are represented as horizontal bars with
+    rounded corners spanning from January 2024 to April 2024. The bars are color-coded
+    by five project phases: Planning (Python Blue #306998), Design (Python Yellow
+    #FFD43B), Development (Green #4C9A2A), Testing (Red-orange #C73E1D), and Deployment
+    (Purple #8B5CF6). Tasks are logically ordered from bottom to top by start date,
+    with "Requirements" at the bottom and "Deployment" at the top. The x-axis shows
+    dates labeled "Timeline" with date ticks in "%b %d" format at -45° angle. The
+    y-axis shows task names labeled "Tasks". A legend titled "Phase" appears on the
+    right side with large, clearly visible symbols. The title "gantt-basic · altair
+    · pyplots.ai" is centered at the top. Grid lines are subtle (dashed, low opacity).
+    The chart clearly shows task overlaps and project timeline progression.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 28pt, axis labels 20pt, tick labels 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, task labels have adequate space, x-axis labels
+          angled to prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar height of 35px with cornerRadius creates clear, well-sized task
+          bars for 12 tasks
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors that are colorblind-safe (blue, yellow, green,
+          red-orange, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions with 1400x800 chart size, legend well-placed on
+          right; slight deduction for extra whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Timeline" and "Tasks" are descriptive but lack units/context detail'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), legend well-placed but symbols
+          could integrate better
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart representing Gantt timeline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows time (start to end), Y-axis shows tasks
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has task bars, category color coding, proper date formatting, logical
+          task ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 tasks fully visible, timeline spans entire project
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to all 5 phases
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "gantt-basic · altair · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overlapping tasks, different durations, multiple phases; could
+          show more variation in task lengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software development project is a perfect, realistic Gantt chart
+          use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Task durations are realistic (1-6 weeks), timeline is logical; some
+          phases could show more duration variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: 'Uses deterministic dates (good) but no random seed needed; minor:
+          data is hardcoded so reproducible'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct scale_factor
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's declarative encoding with x/x2 for ranges, tooltips,
+          and interactive HTML export; could leverage more Altair features like selections
+          or layering
+  verdict: APPROVED
diff --git a/plots/gantt-basic/metadata/bokeh.yaml b/plots/gantt-basic/metadata/bokeh.yaml
index 2fb95c2ad0..cd48777f88 100644
--- a/plots/gantt-basic/metadata/bokeh.yaml
+++ b/plots/gantt-basic/metadata/bokeh.yaml
@@ -25,3 +25,177 @@ review:
   - Missing HoverTool for task details on hover (Bokeh key interactive feature)
   - Generates both PNG and HTML but only PNG is required
   - Axis label Timeline could be more descriptive
+  image_description: 'The plot displays a Gantt chart for a software development project
+    with 12 tasks arranged vertically. Task names appear on the left side (Requirements
+    Analysis, System Design, Database Schema, Backend API, Frontend UI, Unit Testing,
+    Integration, Documentation, System Testing, User Acceptance, Deployment, Training).
+    Horizontal bars span from each task''s start to end date. The x-axis shows "Timeline"
+    with dates from Jan 2025 to Apr 2025. Tasks are color-coded by category: blue
+    (Planning - 2 tasks), yellow/gold (Development - 4 tasks), green (Testing - 3
+    tasks), and red (Deployment - 3 tasks). A legend on the right side shows the four
+    categories. The title reads "gantt-basic · bokeh · pyplots.ai". The background
+    is light gray (#fafafa), bars have dark outlines, and vertical dashed grid lines
+    mark time intervals.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, task labels at 32pt, tick labels
+          at 28pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, task labels well-spaced on left
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with 0.65 height, good alpha (0.9), dark outlines
+          enhance visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, red are distinct and colorblind-friendly (no
+          red-green as only differentiator)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight excess whitespace on left for task labels
+          but reasonable
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Timeline" is descriptive but lacks date format specification'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha 0.5, legend well-placed
+          but slightly far from plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar/Gantt chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Tasks on Y-axis, time on X-axis, correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has task names, start/end dates, category color coding, chronological
+          ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, x-range properly extended for labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 4 categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "gantt-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows overlapping tasks, sequential tasks, parallel work, varying
+          durations
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software development project is a perfect real-world Gantt chart
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic 4-month project timeline with sensible task durations
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses deterministic data (no random), but no explicit seed statement
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also plot.html (extra file, minor issue)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, hbar, datetime axis, custom legend with LegendItem,
+          but could use HoverTool for interactivity
+  verdict: APPROVED
diff --git a/plots/gantt-basic/metadata/highcharts.yaml b/plots/gantt-basic/metadata/highcharts.yaml
index 251d4ad451..ce5bafe777 100644
--- a/plots/gantt-basic/metadata/highcharts.yaml
+++ b/plots/gantt-basic/metadata/highcharts.yaml
@@ -23,3 +23,177 @@ review:
   - Legend symbols are relatively small compared to the large canvas
   - Could add task duration in tooltip for enhanced interactivity
   - Left margin (420px) is slightly larger than necessary
+  image_description: 'The plot displays a well-structured Gantt chart showing a software
+    project timeline from January to April 2025. The chart has 12 tasks listed vertically
+    on the left (Requirements Analysis, System Design, Database Schema, Backend API,
+    Frontend UI, Integration, Unit Testing, System Testing, User Acceptance, Documentation,
+    Training, Go Live). Horizontal bars represent task durations with four distinct
+    colors: blue (#306998) for Planning tasks, yellow (#FFD43B) for Development, purple
+    (#9467BD) for Testing, and cyan (#17BECF) for Deployment. A prominent red vertical
+    line marks "Today (Feb 15)" with a label above it. The X-axis shows the timeline
+    with dates, and a legend at the bottom clearly identifies the four categories.
+    The layout is clean with good spacing between task bars.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (48px), task names clearly readable (26px),
+          axis labels readable (28-32px)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all task names and dates are distinct
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Task bars are well-sized with good pointWidth (55px), rounded corners,
+          and borders; slightly reduced for minor bar height uniformity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette using blue, yellow, purple, cyan - no red-green
+          conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with appropriate margins; slight deduction for
+          somewhat large left margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Tasks" and "Timeline" axis titles are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend is well-placed at bottom; legend symbols could
+          be more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Gantt chart using xrange series type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Tasks on Y-axis, time on X-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has horizontal bars, time axis, category color coding, "today" line,
+          and logical task ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full timeline visible from January to April
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all four categories with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "gantt-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overlapping tasks, sequential dependencies, multiple categories;
+          slight deduction for not showing task dependencies explicitly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real software development project scenario with recognizable phases
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Timeline spans 4 months which is realistic; some tasks could have
+          slightly more varied durations
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (fixed dates, no random)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly but also saves plot.html (minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses xrange module for Gantt, plotLines for today marker, proper
+          datetime handling; could leverage more Highcharts-specific features like
+          tooltips with task duration
+  verdict: APPROVED
diff --git a/plots/gantt-basic/metadata/letsplot.yaml b/plots/gantt-basic/metadata/letsplot.yaml
index 73bfa7e29a..90ede440d0 100644
--- a/plots/gantt-basic/metadata/letsplot.yaml
+++ b/plots/gantt-basic/metadata/letsplot.yaml
@@ -25,3 +25,183 @@ review:
   - Blue and teal colors for Planning and Deployment phases could be more distinct
     for colorblind accessibility
   - Cleanup code for lets-plot-images folder adds unnecessary complexity
+  image_description: 'The plot displays a professional Gantt chart for a software
+    development project with 12 tasks displayed as horizontal bars. Tasks are listed
+    vertically on the y-axis from "Requirements Gathering" at the top to "Deployment"
+    at the bottom, sorted by start date (earliest first). The x-axis shows the project
+    timeline labeled in weeks (Week 1 through Week 11) with angled labels. Each task
+    bar is color-coded by category/phase using a 6-color palette: blue (Planning),
+    purple (Design), green (Development), yellow/gold (Testing), red (Documentation),
+    and teal (Deployment). The legend on the right side clearly identifies each phase.
+    The title "gantt-basic · letsplot · pyplots.ai" is displayed at the top. Task
+    bars have square end markers and show varying durations and overlapping schedules
+    representing a realistic software project timeline.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold, axis labels are clear, task names are readable,
+          week labels are appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, task labels well-spaced, angled x-axis
+          labels prevent overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good thickness, end markers visible, alpha=0.85
+          provides good visibility while allowing overlap perception
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good distinct colors, but blue/teal may be slightly similar for some
+          colorblind viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, plot fills appropriate space, legend
+          well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Tasks" and "Project Timeline" are descriptive but generic; x-axis
+          says "Project Timeline (Days from Start)" in code but displays as "Project
+          Timeline"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, legend is well-placed, but vertical
+          grid lines could be slightly more prominent for timeline reading
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Gantt chart with horizontal bars for tasks
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Tasks on y-axis, time on x-axis, categories for color
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has task bars, time axis, color coding by category; missing "current
+          date" vertical line mentioned in spec notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All tasks visible, timeline shows full project span
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 6 phases with accurate colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "gantt-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overlapping tasks, varying durations, multiple categories,
+          sequential dependencies; could show more complex task dependencies
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software development project is an excellent, realistic scenario
+          with appropriate phases
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 12 tasks over ~10 weeks is realistic for a software project, task
+          durations are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no unnecessary functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded dates), no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used, but `import os` and `shutil` are only for cleanup,
+          not core functionality
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with proper scale=3
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of geom_segment for bars, scale_y_continuous with custom
+          labels, theme customization. Could better leverage lets-plot's interactive
+          HTML features or tooltip capabilities
+  verdict: APPROVED
diff --git a/plots/gantt-basic/metadata/matplotlib.yaml b/plots/gantt-basic/metadata/matplotlib.yaml
index 0505cbeebc..3f5017eb58 100644
--- a/plots/gantt-basic/metadata/matplotlib.yaml
+++ b/plots/gantt-basic/metadata/matplotlib.yaml
@@ -25,3 +25,187 @@ review:
   - Axis labels are generic (Timeline, Tasks) - could be more descriptive like Project
     Timeline (2025) and Project Tasks
   - Could leverage more matplotlib features like annotations for key milestones
+  image_description: 'The plot displays a horizontal Gantt chart showing a software
+    development project timeline. There are 12 tasks arranged vertically from top
+    to bottom: Requirements Analysis, System Design, Database Setup, Backend Development,
+    Frontend Development, API Integration, Unit Testing, Documentation, Integration
+    Testing, User Acceptance Testing, Deployment Prep, and Go Live. Each task is represented
+    by a horizontal bar spanning from its start date to end date. The bars are color-coded
+    by category: dark blue (Python Blue #306998) for Planning tasks, yellow (#FFD43B)
+    for Development tasks, light blue (#4B8BBE) for Testing tasks, and gray (#646464)
+    for Deployment tasks. A vertical dashed red line marks "Today" (March 15). The
+    x-axis shows dates from early January to early May 2025 with bi-weekly Monday
+    intervals (e.g., Jan 13, Jan 27, Feb 10). The title reads "gantt-basic · matplotlib
+    · pyplots.ai" in bold. A legend in the upper right identifies the four categories
+    plus the Today marker. The plot has subtle vertical grid lines and clean styling
+    with top/right spines removed.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, task names
+          at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis labels are rotated 45° to prevent overlap,
+          task names have adequate spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar height is well-proportioned (0.6), bars are clearly visible with
+          good spacing between them, alpha=0.9 provides good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Python Blue, yellow, light blue, and gray - good contrast and
+          colorblind-safe palette (no red-green confusion as primary differentiator)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, legend positioned in upper
+          right without obscuring data
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Timeline" and "Tasks" are descriptive but lack units/context (timeline
+          could indicate "Date" or similar)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha=0.3, x-axis only), legend well-placed with clear
+          entries including the Today marker
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart representing Gantt timeline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Tasks on Y-axis, time on X-axis, duration correctly calculated
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Has all spec features: horizontal bars, time axis, color coding
+          by category, "today" vertical line, logical ordering by start date'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows all dates with 3-day padding on both ends
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 categories plus the Today marker
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "gantt-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows overlapping tasks, sequential tasks, different durations, multiple
+          categories, demonstrates real project phases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Software development project with realistic phases: Planning → Development
+          → Testing → Deployment'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Reasonable task durations (1-5 weeks), realistic project timeline
+          (Jan-May 2025), 12 tasks within the recommended 5-30 range
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple flat structure: imports → data → plot → save, no functions
+          or classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: 'No random data used, but data is deterministic (hardcoded dates)
+          - actually this SHOULD get full points as data is fully deterministic. Correcting:
+          (3/3)'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (datetime, timedelta, mdates, plt, Patch)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API (barh, set_yticks, etc.)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses matplotlib features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib.dates for date formatting/locating, Patch for custom
+          legend, barh for horizontal bars. Could have used additional features like
+          annotations for milestones or progress indicators.
+  verdict: APPROVED
diff --git a/plots/gantt-basic/metadata/plotly.yaml b/plots/gantt-basic/metadata/plotly.yaml
index 3eca5efd9d..cbc32c6966 100644
--- a/plots/gantt-basic/metadata/plotly.yaml
+++ b/plots/gantt-basic/metadata/plotly.yaml
@@ -26,3 +26,179 @@ review:
   weaknesses:
   - Grid lines are very subtle (alpha=0.1) - increasing to 0.2-0.3 would improve readability
   - No hover customization to show task details in interactive version
+  image_description: 'The plot displays a Gantt chart titled "Software Development
+    Project · gantt-basic · plotly · pyplots.ai" with 12 horizontal bars representing
+    project tasks. The x-axis shows "Timeline (2025)" with dates from Jan 12 to Apr
+    20, formatted as "Mon DD". The y-axis is labeled "Project Tasks" with task names
+    including Requirements Analysis, System Design, Database Design, UI/UX Design,
+    Backend Development, Frontend Development, API Integration, Unit Testing, Integration
+    Testing, User Acceptance Testing, Documentation, and Deployment & Launch. Five
+    categories are color-coded: Planning (dark blue #306998), Design (yellow #FFD43B),
+    Development (teal #4ECDC4), Testing (coral/salmon #FF6B6B), and Deployment (light
+    green #95E1A3). A red dashed vertical line labeled "Today" marks Feb 20. The legend
+    is positioned horizontally at the top. Tasks are sorted by start date with overlapping
+    bars clearly showing concurrent activities.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 16-18pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; task names have adequate spacing; legend positioned
+          above plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Horizontal bars well-sized with 0.3 bargap; white border lines provide
+          good separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors with good contrast; avoids pure red-green pairing
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; left margin accommodates task names; slight extra
+          space on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Timeline (2025)" and "Project Tasks" are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend well placed; however grid alpha=0.1 is very subtle (could
+          be slightly more visible)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Gantt chart using px.timeline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Start/end dates on x-axis, tasks on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has task names, start/end dates, category color-coding, and "today"
+          marker line
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All tasks visible with proper axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{descriptive} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overlapping tasks, multiple categories, varying durations;
+          could show more variation in task overlaps
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software development project is a perfect, realistic use case for
+          Gantt charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Task durations (1-6 weeks) and timeline (Jan-Apr) are realistic;
+          some durations could be more varied
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded dates and task names)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only datetime, pandas, and plotly.express used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly.express API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png and plot.html (both appropriate)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses px.timeline which is ideal for Gantt charts; includes interactive
+          HTML export; could leverage hover customization more
+  verdict: APPROVED
diff --git a/plots/gantt-basic/metadata/plotnine.yaml b/plots/gantt-basic/metadata/plotnine.yaml
index b5be3d71e1..5f8474941f 100644
--- a/plots/gantt-basic/metadata/plotnine.yaml
+++ b/plots/gantt-basic/metadata/plotnine.yaml
@@ -24,3 +24,178 @@ review:
     for some colorblind users; consider using a colorblind-safe palette
   - The lineend="butt" parameter in geom_segment may cause deprecation warnings in
     newer plotnine versions
+  image_description: 'The plot displays a horizontal Gantt chart for a software development
+    project. It shows 10 tasks (Requirements, UI Design, Backend Architecture, Database
+    Setup, API Development, Frontend Development, Integration, Testing, Documentation,
+    Deployment) arranged vertically on the y-axis. The x-axis shows dates from mid-January
+    2025 to mid-March 2025 with rotated date labels. Each task is represented by a
+    horizontal bar spanning from start to end date. Tasks are color-coded by phase:
+    Planning (dark blue), Design (yellow), Development (light blue), QA (gray), and
+    Deployment (green). A dashed red vertical line indicates the current date (Feb
+    10, 2025). The legend on the right shows the Phase categories. The title reads
+    "gantt-basic · plotnine · pyplots.ai" in bold.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 24pt, axis labels 20pt, tick labels 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, task names fit well, x-axis labels are rotated
+          to avoid overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Segment bars have good thickness (size=12), clearly visible and distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color contrast overall, but yellow and light blue could be difficult
+          for some colorblind users (-1)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas, plot fills appropriate space, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Date", "Task") but no units (-1)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle vertical grid lines (alpha=0.5), legend well placed on right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Gantt chart with horizontal bars representing task durations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Tasks on Y-axis, dates on X-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Has all spec features: horizontal bars, time axis, color coding
+          by category, current date marker, logical ordering'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within chart bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Phase categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "gantt-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overlapping tasks, varying durations, multiple categories,
+          but all tasks are sequential with no parallel dependencies shown explicitly
+          (-1)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software development project is a perfect realistic context for Gantt
+          charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Dates and durations are plausible, though some tasks like "Deployment"
+          seem short at only 4 days (-1)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic datetime data, no randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses lineend="butt" which generates a warning in newer plotnine versions
+          (-1)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with geom_segment for bars, scale_color_manual
+          for custom colors, theme customization. Could have used faceting or additional
+          plotnine-specific features for enhancement.
+  verdict: APPROVED
diff --git a/plots/gantt-basic/metadata/pygal.yaml b/plots/gantt-basic/metadata/pygal.yaml
index 23632197c4..26fabf9517 100644
--- a/plots/gantt-basic/metadata/pygal.yaml
+++ b/plots/gantt-basic/metadata/pygal.yaml
@@ -24,3 +24,181 @@ review:
   - Legend color boxes are relatively small compared to the large canvas size
   - Missing current date vertical line marker mentioned in spec notes as a consideration
   - Red and green colors used together could be improved for colorblind accessibility
+  image_description: 'The plot displays a Gantt chart titled "Software Development
+    Timeline · gantt-basic · pygal · pyplots.ai" showing 12 software development tasks.
+    Tasks are displayed as horizontal bars on a white background with task names on
+    the left y-axis (Requirements Analysis at bottom, Deployment at top). The x-axis
+    shows a timeline from January to April 2025 with month markers (Feb 1, Mar 1,
+    Apr 1) indicated by vertical dashed gray lines. Tasks are color-coded by category:
+    Planning (blue - #306998), Design (yellow - #FFD43B), Development (green - #2E8B57),
+    Testing (red/crimson - #DC143C), and Deployment (purple - #9370DB). A legend at
+    the bottom shows all five categories. The bars have rounded corners and show task
+    overlaps clearly (e.g., System Design overlaps with Requirements Analysis). Text
+    is legible with monospace font styling.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable, good font sizes for 4800x2700
+          canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all task names clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good spacing, clearly visible against white
+          background
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color palette with distinct hues, though red-green combination
+          could be improved for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, plot fills most of the available space
+          with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive task names, X-axis has "Timeline (2025)" label
+          but no unit clarification
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Dashed month guides are subtle, legend at bottom is functional but
+          small colored squares
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Gantt chart with horizontal bars representing tasks
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Tasks on Y-axis, time on X-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows task durations, overlaps, color coding by category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full date range visible from Jan to late April
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 categories with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "Software Development Timeline · gantt-basic
+          · pygal · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overlapping tasks, sequential dependencies, multiple categories;
+          could show more variation in task lengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software development project is a classic, realistic Gantt chart
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Dates and durations are realistic, though some tasks may be slightly
+          short for real projects
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear flow, but uses custom SVG injection which adds complexity
+          (necessary for Gantt in pygal)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data with fixed dates, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (correct)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Creative use of pygal HorizontalBar with custom SVG injection, custom
+          Style, cairosvg for PNG export. Shows advanced pygal understanding, though
+          native Gantt support would be preferred if available.
+  verdict: APPROVED
diff --git a/plots/gantt-basic/metadata/seaborn.yaml b/plots/gantt-basic/metadata/seaborn.yaml
index 9367295660..2585b3464a 100644
--- a/plots/gantt-basic/metadata/seaborn.yaml
+++ b/plots/gantt-basic/metadata/seaborn.yaml
@@ -27,3 +27,182 @@ review:
     Development bars - should be moved outside the plot area or to upper right
   - The grid on y-axis (alpha=0.15) is unnecessary for a Gantt chart and adds visual
     noise
+  image_description: 'The plot displays a horizontal Gantt chart for a software development
+    project timeline spanning January to April 2025. It shows 12 tasks organized from
+    top to bottom: Training, Deployment, User Acceptance Testing, Integration Testing,
+    Documentation, API Integration, Unit Testing, Frontend Development, Backend Development,
+    Database Design, UI/UX Design, and Requirements Analysis. Each task is represented
+    as a horizontal bar with its position on the x-axis indicating the timeframe.
+    Tasks are color-coded by category: blue for Planning (Requirements Analysis),
+    yellow/gold for Design (UI/UX Design, Database Design), light blue for Development
+    (Backend, Frontend, API Integration), gray for Testing (Unit, Integration, User
+    Acceptance), brown for Documentation, and green for Deployment (Deployment, Training).
+    A red dashed vertical line indicates "Today" (Feb 15, 2025). The legend is positioned
+    in the lower right corner. The x-axis shows dates in "Mon DD, YYYY" format with
+    labels rotated 45 degrees. The title "gantt-basic · seaborn · pyplots.ai" is displayed
+    prominently at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, axis labels at 20pt, tick labels at 14-16pt,
+          all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis labels rotated to prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar sizes are well-proportioned for the 12 tasks, good alpha and
+          edge styling
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Custom colorblind-friendly palette with distinct colors for each
+          category
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall layout, but legend in lower right slightly overlaps
+          with the data area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Timeline (2025)" includes year context, "Tasks" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle, but legend overlaps with Frontend Development and
+          Backend Development bars
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal Gantt chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Tasks on Y-axis, time on X-axis, bars span start to end dates
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has task bars, time axis, category color coding, "today" line, logical
+          ordering by start date
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full project timeline with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 6 categories plus "Today" line
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "gantt-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows overlapping tasks, varying durations, multiple categories,
+          and different project phases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software development project timeline is a perfect real-world use
+          case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic project timeframe (~3 months), sensible task durations
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → processing → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but technically could
+          benefit from explicit note
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn and matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn barplot with hue for category coloring, uses seaborn's
+          set_style and set_context for styling - good library usage but Gantt charts
+          aren't a native seaborn strength
+        score: 3
+        max: 5
+        passed: true
+        comment: ''
+  verdict: APPROVED
diff --git a/plots/gauge-basic/metadata/altair.yaml b/plots/gauge-basic/metadata/altair.yaml
index e83dcbdc92..e51fad761b 100644
--- a/plots/gauge-basic/metadata/altair.yaml
+++ b/plots/gauge-basic/metadata/altair.yaml
@@ -23,3 +23,175 @@ review:
   weaknesses:
   - Code contains a function definition value_to_angle() which violates the KISS structure
     requirement (imports → data → plot → save, no functions)
+  image_description: 'The plot displays a semi-circular gauge chart with three colored
+    zones: red (0-30), yellow (30-70), and green (70-100). A dark blue needle points
+    to approximately the 72 mark, which falls in the green zone. The title "gauge-basic
+    · altair · pyplots.ai" appears at the top in large text. Threshold values "30"
+    and "70" are labeled above the arc at their respective positions. Min (0) and
+    max (100) values are displayed at the left and right ends of the gauge. A prominent
+    "72%" label is centered below the gauge hub. The layout uses a white background
+    with good contrast.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear, value label "72%" is bold and prominent,
+          all threshold and range labels are readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Arc segments are clearly visible with distinct colors, needle is
+          thick and clearly points to the value
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Red/yellow/green is intuitive for gauges but not ideal for colorblind
+          users; however, the zones are also labeled with numbers
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with the gauge centered, though there is
+          some empty space at the bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Not applicable for gauge charts (no traditional axes), but min/max
+          labels are present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed for gauge; color zones are self-explanatory
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semi-circular gauge chart as specified
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Value correctly mapped to needle position at 72
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: needle indicator, color zones (red/yellow/green),
+          value label, min/max bounds, thresholds at 30 and 70'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range 0-100 is visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Threshold labels and range labels are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "gauge-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows gauge with value in green zone; could also demonstrate edge
+          cases but current value (72) effectively shows the concept
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales performance at 72% is a realistic business KPI scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 0-100 percentage scale is appropriate; thresholds at 30/70 are reasonable
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Contains a function definition `value_to_angle()` which violates
+          KISS principle (no functions/classes rule)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (fixed values, no randomness)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (altair, numpy, pandas) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Altair's declarative layering with `alt.layer()`,
+          custom arc marks with `mark_arc()`, rule marks for needle, and proper theta
+          encoding for radial positioning
+  verdict: APPROVED
diff --git a/plots/gauge-basic/metadata/bokeh.yaml b/plots/gauge-basic/metadata/bokeh.yaml
index cc8f1d90ca..c93d24b5dc 100644
--- a/plots/gauge-basic/metadata/bokeh.yaml
+++ b/plots/gauge-basic/metadata/bokeh.yaml
@@ -25,3 +25,169 @@ review:
     using blue/orange/green or adding patterns
   - Could use Bokeh annular_wedge glyph for simpler arc rendering instead of manual
     polygon patches
+  image_description: 'The plot displays a semi-circular gauge chart with three color
+    zones: red (0-30), yellow (30-70), and green (70-100). The gauge has a dark blue
+    needle pointing to approximately 72, which falls in the green zone. A center hub
+    circle anchors the needle. Tick marks appear at 0, 25, 50, 75, and 100 with corresponding
+    numeric labels positioned outside the arc. The current value "72" is prominently
+    displayed in bold blue text below the gauge center. The title "gauge-basic · bokeh
+    · pyplots.ai" appears at the top. The overall layout is clean with good use of
+    whitespace and a balanced composition on a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title at 36pt, tick labels at 24pt, value at 48pt - all clearly
+          readable. Minor: title could be slightly larger for a 4800px canvas'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements whatsoever
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Gauge arc segments are well-sized, needle is clearly visible, center
+          hub appropriately sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Red/yellow/green zones are distinguishable; however, red-green colorblind
+          users may have difficulty distinguishing the two extreme zones
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space, gauge is well-centered with balanced
+          margins
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid needed for gauge; no legend needed as zones are self-explanatory
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semi-circular gauge chart as specified
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Value of 72 correctly mapped to needle position
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: color zones (red/yellow/green), needle indicator,
+          value label, tick marks'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range 0-100 displayed with appropriate tick marks
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for gauge, zones self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "gauge-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows gauge with value in green zone; could also demonstrate values
+          in other zones
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Value of 72 out of 100 is a plausible KPI/score scenario (matches
+          spec example)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 0-100 scale with thresholds at 30 and 70 is sensible; exactly matches
+          spec example
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (fixed values, no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, bokeh.io, bokeh.models, bokeh.plotting)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's figure, patch, circle, line, and Label correctly. Creates
+          custom geometry via patch polygons. Could leverage more Bokeh-specific features
+          like hover tooltips or annular_wedge for cleaner arc rendering.
+  verdict: APPROVED
diff --git a/plots/gauge-basic/metadata/highcharts.yaml b/plots/gauge-basic/metadata/highcharts.yaml
index b61a4758da..04e4451ea3 100644
--- a/plots/gauge-basic/metadata/highcharts.yaml
+++ b/plots/gauge-basic/metadata/highcharts.yaml
@@ -24,3 +24,13 @@ review:
     canvas)
   - Title format uses middle dot separators correctly but position could be higher
     for better spacing
+  image_description: 'The plot displays a semi-circular gauge chart with a dark blue
+    needle pointing to the value 72. The gauge spans from 0 to 100 with tick marks
+    and numeric labels at every 2 units. The title "gauge-basic · highcharts · pyplots.ai"
+    appears at the top in bold black text. Three color zones are visible: purple (0-30)
+    on the left for the low zone, golden/yellow (30-70) in the middle zone, and dark
+    blue (70-100) on the right for the high zone. The value "72" is prominently displayed
+    as a large bold number below the gauge center. The axis label "Performance (%)"
+    appears in the center of the gauge. The background is white with subtle gradient
+    effects on the gauge dial frame.'
+  verdict: APPROVED
diff --git a/plots/gauge-basic/metadata/letsplot.yaml b/plots/gauge-basic/metadata/letsplot.yaml
index 7745c1ffd3..9ef7c99510 100644
--- a/plots/gauge-basic/metadata/letsplot.yaml
+++ b/plots/gauge-basic/metadata/letsplot.yaml
@@ -25,3 +25,172 @@ review:
     users
   - Could add intermediate tick marks or labels for better readability of values between
     0-100
+  image_description: 'The plot displays a semi-circular gauge chart (speedometer style)
+    with three distinct color zones: red on the left (0-30 range), yellow in the middle
+    (30-70 range), and green on the right (70-100 range). A dark navy/black needle
+    points to the value 72, which falls in the green zone. The gauge has a dark circular
+    pivot point at the center base. Min value "0" is labeled on the left edge and
+    max value "100" on the right edge. The current value "72" is prominently displayed
+    in large bold text below the gauge. The title "gauge-basic · letsplot · pyplots.ai"
+    appears in the top-left corner.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, value label, and min/max labels are all clearly readable.
+          Title could be slightly larger but is acceptable.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Gauge zones are clearly visible with good contrast, needle is prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses red/yellow/green which is intuitive but not fully colorblind-safe
+          (red-green distinction)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Gauge is well-centered, good use of canvas space for a semi-circular
+          design
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels needed for gauge, but min/max labels serve this purpose
+          (N/A for gauge type)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly hidden, clean background
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semi-circular gauge chart as specified
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Value of 72 correctly positioned between min (0) and max (100)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has color zones (red/yellow/green), needle indicator, prominent value
+          display, min/max labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range 0-100 visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, correctly hidden
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "gauge-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows all gauge features (zones, needle, value display), but only
+          shows one value state
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales performance at 72% is a realistic business metric scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 0-100 scale is appropriate, thresholds at 30/70 are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (fixed values, no randomness)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses lets-plot's ggplot2-style grammar with geom_polygon, geom_segment,
+          and custom theming. Could leverage more interactive features.
+  verdict: APPROVED
diff --git a/plots/gauge-basic/metadata/matplotlib.yaml b/plots/gauge-basic/metadata/matplotlib.yaml
index ee7f9c3a71..cdc28da434 100644
--- a/plots/gauge-basic/metadata/matplotlib.yaml
+++ b/plots/gauge-basic/metadata/matplotlib.yaml
@@ -23,3 +23,167 @@ review:
   weaknesses:
   - Red/yellow/green color scheme follows spec convention but is not colorblind-accessible
   - Could add minor tick marks between major values for finer reading
+  image_description: 'The plot displays a semi-circular gauge chart with a white background.
+    The gauge arc is divided into three colored zones: red (0-30), yellow (30-70),
+    and green (70-100). A dark navy needle points to the value 72, which falls in
+    the green zone. The needle has a round center cap. Tick marks with labels (0,
+    25, 50, 75, 100) are positioned around the outer edge of the arc. Below the gauge,
+    the value "72" is prominently displayed in large blue text, with "Current Sales"
+    in smaller gray text beneath it. The title "gauge-basic · matplotlib · pyplots.ai"
+    appears at the top of the image.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text clearly readable; title 24pt, tick labels 18pt bold, value
+          48pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean label positioning
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Gauge elements well-sized: thick needle, visible wedges, prominent
+          center cap'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses spec-required red/yellow/green convention, but not colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, balanced composition
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for gauge; contextual "Current Sales" label present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Clean design, no grid/legend needed for gauge
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semi-circular gauge chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Value 72 on 0-100 range with thresholds at 30/70
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Needle, color zones, value label, tick marks all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-100 range displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A; contextual label present
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct: "gauge-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows value in green zone, all three color zones visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: '"Current Sales" at 72% is plausible business metric'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 0-100 with 30/70 thresholds makes intuitive sense
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data but no seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of mpatches.Wedge, plt.Circle, custom coordinate transforms
+  verdict: APPROVED
diff --git a/plots/gauge-basic/metadata/plotly.yaml b/plots/gauge-basic/metadata/plotly.yaml
index a754c4729d..835ba41c91 100644
--- a/plots/gauge-basic/metadata/plotly.yaml
+++ b/plots/gauge-basic/metadata/plotly.yaml
@@ -23,3 +23,176 @@ review:
   weaknesses:
   - Red/yellow/green color scheme may not be fully accessible to colorblind users;
     consider adding patterns or alternative color palette
+  image_description: 'The plot displays a semi-circular gauge chart showing "Sales
+    Target Achievement" with a prominent "72%" value displayed in the center. The
+    gauge has three color zones: red (0-30%), yellow (30-70%), and green (70-100%),
+    following the intuitive red=bad, yellow=caution, green=good convention. A dark
+    blue (Python blue #306998) bar indicates the current value position at 72%, placing
+    it just into the green zone. Tick marks appear at 10% intervals (0%, 10%, 20%...
+    100%) around the arc with percentage labels. The title "Sales Target Achievement"
+    appears at the top with the subtitle "gauge-basic · plotly · pyplots.ai" below
+    it. The overall layout is clean with a white background and good use of canvas
+    space.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, value (72%), and tick labels are all clearly readable
+          at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Gauge arc, color zones, and indicator bar are clearly visible and
+          well-proportioned
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Red/yellow/green zones are distinguishable, though red-green colorblind
+          users may have difficulty differentiating extreme zones
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization; gauge fills appropriate space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Tick labels include "%" suffix, making units clear
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for gauge charts (no grid or legend needed)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct gauge/speedometer chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Value correctly mapped to gauge position
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: value display, min/max range, color zones
+          with thresholds'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-100% range displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed for single-value gauge)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "gauge-basic · plotly · pyplots.ai" format in subtitle
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows value in yellow-green transition zone; could have demonstrated
+          a value in red or deep green zone as alternative
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales target achievement is a realistic, relatable business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 72% is a sensible value; thresholds at 30/70 are reasonable for performance
+          tiers
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded value=72), no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only `plotly.graph_objects` imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Plotly's `go.Indicator` with gauge mode, threshold marker, and
+          HTML export for interactivity; could leverage more interactive features
+          like hover tooltips
+  verdict: APPROVED
diff --git a/plots/gauge-basic/metadata/plotnine.yaml b/plots/gauge-basic/metadata/plotnine.yaml
index 1fcf1092a3..9e1eb8e51c 100644
--- a/plots/gauge-basic/metadata/plotnine.yaml
+++ b/plots/gauge-basic/metadata/plotnine.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/gauge-basic/metadata/pygal.yaml b/plots/gauge-basic/metadata/pygal.yaml
index 18401d2a7f..af123ea5fd 100644
--- a/plots/gauge-basic/metadata/pygal.yaml
+++ b/plots/gauge-basic/metadata/pygal.yaml
@@ -23,3 +23,165 @@ review:
     - only the current zone color shows
   - Title format includes extra text (Sales Performance) before the required format
   - Legend is positioned far from the gauge in the bottom-left corner
+  image_description: 'The plot displays a semi-circular (half-pie) solid gauge chart
+    with a green (#2ECC71) filled arc representing 72% progress. The gauge has an
+    inner radius creating a donut-style appearance. The value "72%" is displayed twice
+    - once inside the colored arc and once at the bottom right of the gauge in the
+    unfilled (light gray) section. The title "Sales Performance · gauge-basic · pygal
+    · pyplots.ai" appears at the top in dark text. A legend at the bottom left shows
+    "Current Sales: 72% (Good Zone)" with a small green marker. The background is
+    white, and the overall layout is clean with the gauge centered in the canvas.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and value labels are clearly readable; legend text is slightly
+          small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Gauge arc is clearly visible with good contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Green color is distinguishable; single-color gauge doesn't require
+          colorblind considerations
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good centering but legend is isolated at bottom-left corner
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend present and informative
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semi-circular gauge chart using SolidGauge
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Value correctly mapped to gauge fill
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows value, range (0-100), and color zone; missing visible threshold
+          boundaries
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-100 range displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows value and zone
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title includes spec-id, library, and pyplots.ai but adds "Sales Performance"
+          prefix
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows single value gauge well; doesn't demonstrate threshold zones
+          visually
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales performance percentage is a realistic business KPI scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 72% is a realistic sales performance value
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → style → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded value), but no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Effectively uses pygal's SolidGauge with half_pie, inner_radius,
+          value_formatter, and custom Style
+  verdict: APPROVED
diff --git a/plots/gauge-basic/metadata/seaborn.yaml b/plots/gauge-basic/metadata/seaborn.yaml
index 2556a73eab..b0a178c254 100644
--- a/plots/gauge-basic/metadata/seaborn.yaml
+++ b/plots/gauge-basic/metadata/seaborn.yaml
@@ -23,3 +23,174 @@ review:
   - Zone labels could be slightly larger for better readability at full resolution
   - Needle tip marker (red triangle) adds visual interest but is unconventional for
     gauge charts
+  image_description: 'The plot displays a semi-circular gauge chart showing "Sales
+    Performance" at 72%. The gauge has three colored zones: a red/coral "Low" zone
+    on the left (0-30), an orange "Medium" zone in the middle (30-70), and a green
+    "High" zone on the right (70-100). A dark blue needle points to 72, which is in
+    the High zone. The needle has a red triangular tip for visibility. The value "72%"
+    is prominently displayed below the gauge in large blue text. Zone labels ("Low",
+    "Medium", "High") are displayed in white with dark outlines on their respective
+    colored zones. Min (0) and max (100) values are labeled at the gauge endpoints.
+    The title "gauge-basic · seaborn · pyplots.ai" appears at the top, with "Sales
+    Performance" as a subtitle below. The background is clean white.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is clearly readable. Title, value, and labels have good
+          font sizes. Zone labels could benefit from slightly larger fonts.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Gauge zones are clearly visible using seaborn scatterplot; needle
+          and hub are prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's colorblind palette correctly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, gauge fills appropriate space
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for gauge chart (no axes), but min/max labels present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean design, no distracting elements
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semi-circular gauge chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Value correctly mapped to needle position
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: value display, thresholds/zones, needle indicator,
+          min/max labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-100 range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Zone labels present but no formal legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: gauge-basic · seaborn · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows zones, needle position, value display. Could show a value in
+          a different zone to demonstrate full range.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales performance is a realistic business KPI scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 72% is a sensible sales performance value; thresholds at 30/70 are
+          reasonable
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple linear structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn scatterplot, lineplot, and colorblind palette creatively
+          for gauge construction. However, this is a creative workaround since seaborn
+          doesn't have native gauge support.
+  verdict: APPROVED
diff --git a/plots/heatmap-annotated/metadata/altair.yaml b/plots/heatmap-annotated/metadata/altair.yaml
index 2c933c7b83..ed77f70623 100644
--- a/plots/heatmap-annotated/metadata/altair.yaml
+++ b/plots/heatmap-annotated/metadata/altair.yaml
@@ -24,3 +24,178 @@ review:
     context
   - Does not utilize Altair interactive features (tooltips) in the HTML output
   - Canvas utilization could be improved with slightly larger margins
+  image_description: 'The plot displays a 7×7 annotated correlation heatmap showing
+    relationships between business metrics: Sales, Marketing, R&D, Support, Revenue,
+    Growth, and Profit. The visualization uses a blue-orange diverging colormap (blueorange
+    scheme) with the scale ranging from -1 to 1. Each cell contains its correlation
+    coefficient formatted to 2 decimal places. The diagonal shows perfect correlations
+    (1.00) in dark orange. Strong positive correlations appear in orange/brown (e.g.,
+    Sales-Revenue: 0.86, R&D-Growth: 0.87, Revenue-Profit: 0.87), while the only negative
+    correlation visible is Support-Profit at -0.24 in light blue. Text annotations
+    use white color on dark backgrounds (|correlation| > 0.5) and black on light backgrounds
+    for optimal contrast. The title "heatmap-annotated · altair · pyplots.ai" appears
+    at the top. A vertical colorbar legend on the right shows the correlation scale.
+    The chart is square with equal-sized cells separated by white borders.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 18pt, annotations at 20pt bold - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, x-axis labels angled at -45° to prevent
+          collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cell sizes well-proportioned, annotations clearly visible within
+          cells
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging blue-orange colormap is colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but slightly compact; could use more padding
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels show "Variable" which is descriptive but generic without units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: White cell borders provide clean separation, legend well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct annotated heatmap implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped to variable names, values to color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: color intensity, text annotations, contrast
+          text, colorbar'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full correlation range -1 to 1 displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Correlation" with accurate scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "heatmap-annotated · altair · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows strong positive correlations and negative correlations, but
+          negative values are limited (only -0.24 visible)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Business metrics correlation matrix is a highly realistic and common
+          use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Correlation values are realistic; deliberately engineered correlations
+          (Revenue from Sales/Marketing) work well
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (minor deviation but acceptable
+          for interactive library)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of layered marks (rect + text), conditional color encoding,
+          but could leverage more Altair-specific features like tooltips for interactivity
+  verdict: APPROVED
diff --git a/plots/heatmap-annotated/metadata/bokeh.yaml b/plots/heatmap-annotated/metadata/bokeh.yaml
index 2ed0b07105..280a0515c1 100644
--- a/plots/heatmap-annotated/metadata/bokeh.yaml
+++ b/plots/heatmap-annotated/metadata/bokeh.yaml
@@ -22,3 +22,175 @@ review:
   - Generic axis labels "Variable" instead of more descriptive labels
   - Colorbar could include more context (e.g., "Pearson Correlation" instead of just
     "Correlation")
+  image_description: 'The plot displays an 8x8 correlation matrix heatmap for financial
+    metrics: Revenue, Profit, Assets, Debt, Growth, ROI, Market Cap, and Volume. The
+    visualization uses a diverging blue-white-red colormap where dark blue represents
+    strong negative correlations (-1), white represents no correlation (0), and dark
+    red represents strong positive correlations (+1). Each cell contains a numeric
+    annotation showing the correlation coefficient to 2 decimal places. The diagonal
+    shows perfect correlations (1.00) in dark red. Notable strong correlations include
+    Revenue-Profit (0.82), Revenue-Market Cap (0.58), and ROI-Profit (0.62). The colorbar
+    on the right displays the scale with "Correlation" label. X-axis labels are rotated
+    at the top of the chart, and both axes are labeled "Variable". The title correctly
+    follows the format "heatmap-annotated · bokeh · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title at 32pt, axis labels at 24pt,
+          tick labels at 18pt, and cell annotations at 24pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, rotated x-axis labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are well-sized, annotations clearly visible within each cell
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging blue-white-red colormap is colorblind-friendly and provides
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format (3600x3600) appropriate for matrix, good use of canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: Both axes labeled "Variable" which is generic, not descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid properly disabled, but colorbar title could be more descriptive
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct annotated heatmap type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned as matrix rows/columns
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: annotations, colorbar, contrast text
+          colors, diverging colormap'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full -1 to 1 correlation range visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar shows correlation scale correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows full range: positive correlations, negative correlations,
+          near-zero values, and perfect diagonal'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Financial correlation matrix is plausible, correlations like Revenue-Profit
+          (0.82) make sense
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Correlation values properly bounded [-1, 1]
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and transform correctly, but doesn't leverage
+          Bokeh's interactive capabilities (tooltips showing exact values on hover
+          would be natural for this use case)
+  verdict: APPROVED
diff --git a/plots/heatmap-annotated/metadata/highcharts.yaml b/plots/heatmap-annotated/metadata/highcharts.yaml
index f4332f89ed..77ba2bb9c0 100644
--- a/plots/heatmap-annotated/metadata/highcharts.yaml
+++ b/plots/heatmap-annotated/metadata/highcharts.yaml
@@ -25,3 +25,182 @@ review:
     documented in prompts/library/highcharts.md
   - Annotation font size could be slightly larger for better readability at a distance
   - Colorbar legend labels are small relative to the chart size
+  image_description: The plot shows a 7×7 correlation matrix heatmap for financial
+    indicators (Revenue, Profit, Growth, ROI, Debt, Assets, Employees). The heatmap
+    uses a diverging colormap with blue for negative correlations (-1), white for
+    zero, and golden-yellow for positive correlations (+1). Each cell displays its
+    correlation value with 2 decimal places (e.g., 1.00 on the diagonal, -0.84 for
+    Growth-Assets). The title reads "heatmap-annotated · highcharts · pyplots.ai"
+    at the top. Both axes are labeled "Variables" with category labels on x-axis (bottom)
+    and y-axis (left). A vertical colorbar legend on the right shows the scale from
+    -1 to 1. Cell borders are white, creating clear separation between cells.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title, labels, and annotations are clearly readable. Font sizes
+          are appropriate for the 3600×3600 canvas. Minor: annotation text could be
+          slightly larger for perfect visibility.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all labels and annotations are clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are appropriately sized with good visibility of all data elements.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue-white-yellow diverging colormap which is colorblind-safe
+          (avoids red-green).
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square aspect ratio is perfect for a correlation matrix. Good margins
+          and balanced whitespace.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Variables" labels are descriptive but lack context units (acceptable
+          for correlation which is unitless).'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar legend is well-positioned. White cell borders serve as grid.
+          Legend labels could be slightly larger.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap type with annotations.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly show categories, color correctly shows correlation
+          values.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: annotations, colorbar, diverging
+          colormap, contrast text.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, colorbar shows full -1 to 1 range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately reflects the correlation scale.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "{spec-id} · {library} · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive and negative correlations, diagonal ones, varying
+          magnitudes. Could show a few more extreme correlations (only -0.84 and -0.62
+          approach the extremes).
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial indicators correlation matrix is a realistic and comprehensible
+          scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Correlation values are in valid range [-1, 1]. Matrix is symmetric
+          as expected. Some values seem slightly random for financial data but acceptable.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean script structure: imports → data → chart options → rendering.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data generation.
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: true
+        comment: Imports json and urllib.request but these are necessary. However,
+          the code doesn't use highcharts-core library as recommended in library rules
+          - it uses raw JavaScript/JSON approach instead.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts heatmap module with dataLabels, colorAxis with stops.
+          However, doesn't use highcharts-core Python library as documented in library
+          rules, instead uses raw JSON/JavaScript approach.
+  verdict: APPROVED
diff --git a/plots/heatmap-annotated/metadata/letsplot.yaml b/plots/heatmap-annotated/metadata/letsplot.yaml
index ff7e05cb18..173271102a 100644
--- a/plots/heatmap-annotated/metadata/letsplot.yaml
+++ b/plots/heatmap-annotated/metadata/letsplot.yaml
@@ -24,3 +24,178 @@ review:
     be more descriptive)
   - Random correlation data could have more realistic sector relationships (e.g.,
     Tech-Finance correlation patterns)
+  image_description: The plot displays an 8×8 correlation matrix heatmap for stock
+    sectors (Tech, Finance, Healthcare, Energy, Consumer, Industrial, Materials, Utilities).
+    It uses a diverging colormap with blue (#2166AC) for negative correlations, white
+    (#F7F7F7) for near-zero values, and red (#B2182B) for positive correlations. Each
+    cell contains bold numeric annotations showing correlation coefficients to 2 decimal
+    places. The diagonal shows perfect correlation (1.00) as expected. Text color
+    dynamically contrasts with background - white text on dark cells (|value| > 0.5)
+    and black text on lighter cells. X-axis labels are rotated 45 degrees. A colorbar
+    legend labeled "Correlation" shows the scale from -1 to 1. The title correctly
+    follows the format "heatmap-annotated · lets-plot · pyplots.ai".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 24pt, axis labels 20pt, tick labels 16pt, annotations
+          visible and readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, 45-degree rotation prevents x-axis
+          label overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles well-sized, annotations clearly visible with appropriate contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging blue-white-red colormap is colorblind-friendly (RdBu variant)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but some extra whitespace on right side due to colorbar
+          placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Sector") but no units (appropriate for correlation
+          matrix)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed, white cell borders provide good separation, but
+          no grid (appropriate for heatmap)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap with annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned to sectors, value mapped to color and annotation
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Annotations present, auto-contrasting text color, colorbar included,
+          diverging colormap used
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all 8 sectors, colorbar shows full -1 to 1 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar labeled "Correlation" with correct scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "heatmap-annotated · lets-plot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows positive correlations, negative correlations, perfect diagonal,
+          values near zero
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Stock sector correlations are plausible but generated randomly rather
+          than based on real market data patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All values properly bounded [-1, 1], symmetric matrix as expected
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot imported, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves as plot.png but also generates plot.html (minor: both outputs
+          are fine)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2-style grammar with geom_tile, geom_text, scale_color_identity,
+          scale_fill_gradient2, theme_minimal. Good use of lets-plot features but
+          doesn't leverage advanced interactive features that lets-plot offers.
+  verdict: APPROVED
diff --git a/plots/heatmap-annotated/metadata/matplotlib.yaml b/plots/heatmap-annotated/metadata/matplotlib.yaml
index ac55504221..574aa67679 100644
--- a/plots/heatmap-annotated/metadata/matplotlib.yaml
+++ b/plots/heatmap-annotated/metadata/matplotlib.yaml
@@ -23,3 +23,174 @@ review:
   - Axis labels are generic ("Variables" instead of "Financial Metrics")
   - Grid linewidth=2 is slightly heavy; alpha or linewidth=1 would be more subtle
   - Does not leverage advanced matplotlib features like matshow() or custom Normalize
+  image_description: 'The plot displays an 8×8 annotated correlation heatmap showing
+    relationships between financial metrics: Revenue, Profit, Expenses, Growth, Risk,
+    ROI, Debt, and Assets. The heatmap uses a diverging RdBu_r colormap (red for positive,
+    blue for negative correlations) with values ranging from -1.0 to 1.0. Each cell
+    contains a bold numerical annotation with 2 decimal places. Text color automatically
+    contrasts with the background—white text on dark cells (high |correlation|), black
+    text on light cells (low |correlation|). The diagonal shows 1.00 values (self-correlation)
+    in dark red. A vertical colorbar on the right labeled "Correlation Coefficient"
+    shows the value scale. X-axis labels are rotated 45° for readability. White grid
+    lines separate cells cleanly.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt, annotations 14pt
+          bold - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; rotated x-labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cell annotations perfectly sized relative to cells; clear visual
+          distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r diverging colormap is colorblind-safe; red-blue distinction
+          works for most color vision deficiencies
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format appropriate for correlation matrix; good use of canvas
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels say "Variables" without units; could be more descriptive (e.g.,
+          "Financial Metrics")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: White grid is clean but linewidth=2 is slightly heavy; colorbar well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct annotated heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y are variable labels, values are correlations
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has annotations, colorbar, automatic text contrast
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full -1 to 1 correlation range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "heatmap-annotated · matplotlib · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows positive correlations (0.85 Revenue-Profit), negative correlations
+          (-0.52 Profit-Debt), near-zero values, and diagonal=1
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial metrics correlation matrix is a real-world use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Correlation values are realistic and within valid -1 to 1 range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic imshow() rather than matplotlib's newer matshow() or pcolormesh();
+          no use of Normalize or custom formatters
+  verdict: APPROVED
diff --git a/plots/heatmap-annotated/metadata/plotly.yaml b/plots/heatmap-annotated/metadata/plotly.yaml
index 099b3c3210..66f1a71153 100644
--- a/plots/heatmap-annotated/metadata/plotly.yaml
+++ b/plots/heatmap-annotated/metadata/plotly.yaml
@@ -23,3 +23,173 @@ review:
   weaknesses:
   - Axis labels are generic Sector - could be Stock Market Sector for more context
   - Colorbar thickness could be slightly larger for the canvas size
+  image_description: The plot displays an 8x8 sector correlation matrix heatmap showing
+    correlations between stock market sectors (Technology, Healthcare, Finance, Energy,
+    Consumer, Industrial, Materials, Utilities). The matrix uses a diverging RdBu
+    (Red-Blue) colorscale ranging from -1 (blue) to +1 (red), centered at 0 (white/cream).
+    Each cell contains its correlation value formatted to 2 decimal places. The diagonal
+    shows perfect correlation (1.00) in dark red. Text annotations use white font
+    on dark-colored cells (high absolute correlation) and black font on light-colored
+    cells for contrast. The title reads "Sector Correlation Matrix · heatmap-annotated
+    · plotly · pyplots.ai" at the top. Both axes are labeled "Sector" with rotated
+    x-axis labels at 45 degrees. A colorbar on the right shows the correlation scale.
+    The plot has square cells and uses the plotly_white template for a clean background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text clearly readable, good font sizes (32pt title, 24pt axis
+          labels, 18pt annotations and ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text, rotated x-labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: cells are perfectly sized, annotations clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu diverging colormap is colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: good layout but cells could use slightly more canvas area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: descriptive labels "Sector" but no units (N/A for this context)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: colorbar well-placed, no grid needed for heatmap
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct annotated heatmap type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned as sector labels
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'all spec features present: annotations, colorbar, contrast text,
+          diverging colormap'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: colorscale shows full -1 to 1 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: colorbar correctly labeled "Correlation"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct format "{description} · {spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows positive correlations, negative correlations, perfect diagonal,
+          and varying magnitudes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: stock sector correlation matrix is a real financial analysis scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: correlation values range from -0.23 to 1.00, realistic for market
+          sectors
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'simple script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set correctly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only numpy and plotly.figure_factory used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'saves as plot.png and plot.html (minor: both outputs correct)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: uses plotly's `ff.create_annotated_heatmap` and custom hover template,
+          but could leverage more interactive features like click callbacks
+  verdict: APPROVED
diff --git a/plots/heatmap-annotated/metadata/plotnine.yaml b/plots/heatmap-annotated/metadata/plotnine.yaml
index 5b3940a251..2d5b4a8343 100644
--- a/plots/heatmap-annotated/metadata/plotnine.yaml
+++ b/plots/heatmap-annotated/metadata/plotnine.yaml
@@ -27,3 +27,177 @@ review:
     default
   - Library features score is modest - implementation works but does not showcase
     unique plotnine capabilities
+  image_description: The plot displays an 8×8 correlation matrix heatmap of economic
+    indicators (GDP Growth, Inflation, Unemployment, Interest Rate, Consumer Conf,
+    Mfg Index, Export Vol, Housing). The heatmap uses a diverging RdBu (red-blue)
+    colormap with red indicating negative correlations and blue indicating positive
+    correlations. Each cell contains the correlation coefficient formatted to 2 decimal
+    places. The diagonal shows 1.00 values (self-correlation) in deep blue. Text colors
+    automatically switch between white (for dark backgrounds with |correlation| >
+    0.5) and black (for light backgrounds) for readability. The colorbar on the right
+    shows the correlation scale from -1.0 to 1.0. X-axis labels are rotated 45 degrees.
+    The title follows the required format. The plot uses a square aspect ratio with
+    white borders between cells.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear (24pt), axis labels are readable (14pt with
+          rotation), annotations inside cells are visible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; x-axis labels rotated 45° prevents collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles are well-sized, annotations clearly visible with proper contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu diverging palette is colorblind-safe for correlation data
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas (12×12), though could use slightly more
+          of the canvas area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels say "Variable" which is descriptive but generic; no units
+          (though N/A for correlation)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: White cell borders work well; legend well-placed but title could
+          be more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: annotated heatmap'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly show variables, fill mapped to correlation values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has annotations, colorbar, contrasting text colors, proper number
+          formatting
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Correlation values range -1 to 1 as expected
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Correlation" label is accurate'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses `heatmap-annotated · plotnine · pyplots.ai` format correctly
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both positive and negative correlations, diagonal 1.0 values,
+          range of strengths; could show more extreme correlations
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Economic indicators correlation is a perfect real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible for economic correlations, though some relationships
+          could be more economically intuitive
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar well with geom_tile + geom_text, scale_fill_distiller,
+          and scale_color_identity, but doesn't leverage any unique plotnine features
+          beyond standard ggplot2 patterns
+  verdict: APPROVED
diff --git a/plots/heatmap-annotated/metadata/pygal.yaml b/plots/heatmap-annotated/metadata/pygal.yaml
index 0994e10b72..2a3575692c 100644
--- a/plots/heatmap-annotated/metadata/pygal.yaml
+++ b/plots/heatmap-annotated/metadata/pygal.yaml
@@ -29,3 +29,14 @@ review:
     support
   - The colormap only shows positive correlations (0.28 to 1.00); for a fully diverging
     colormap demo, negative correlations would better showcase the blue end
+  image_description: 'The plot displays a 6x6 correlation matrix heatmap showing relationships
+    between business metrics: Revenue, Marketing, R&D Spend, Customers, Satisfaction,
+    and Retention. The cells are colored using a diverging blue-white-red colormap
+    where blue indicates lower correlations (~0.28), white/light colors represent
+    mid-range values (~0.64), and red indicates strong positive correlations (1.00).
+    Each cell displays its correlation value with 2 decimal places. The diagonal shows
+    perfect correlations (1.00) in dark red. Row labels appear on the left, column
+    labels are rotated 45° at the bottom. A vertical colorbar on the right shows the
+    value scale from 0.28 to 1.00 labeled "Correlation". The title follows the required
+    format at the top.'
+  verdict: APPROVED
diff --git a/plots/heatmap-annotated/metadata/seaborn.yaml b/plots/heatmap-annotated/metadata/seaborn.yaml
index 85a4f5c9eb..8e6e666458 100644
--- a/plots/heatmap-annotated/metadata/seaborn.yaml
+++ b/plots/heatmap-annotated/metadata/seaborn.yaml
@@ -23,3 +23,180 @@ review:
   weaknesses:
   - Missing axis titles (e.g., Asset Class for both axes) would add context
   - Colorbar could have more intermediate tick values for precision reading
+  image_description: 'The plot displays a 7x7 correlation matrix heatmap showing relationships
+    between financial asset classes: Stocks, Bonds, Gold, Real Estate, Crypto, Commodities,
+    and Cash. The heatmap uses a diverging RdBu_r colormap with dark red indicating
+    strong positive correlations (+1.0), white/light for near-zero correlations, and
+    dark blue for strong negative correlations (-1.0). Each cell contains bold white
+    or dark text annotations showing the correlation coefficient to 2 decimal places.
+    The text color automatically contrasts with the background - white text on dark
+    cells, dark text on light cells. The diagonal shows perfect 1.00 correlations
+    (dark red). A vertical colorbar on the right shows the correlation scale from
+    -1.0 to 1.0. The title "heatmap-annotated · seaborn · pyplots.ai" appears in bold
+    at the top. X-axis labels are rotated 45 degrees for readability. Cell gridlines
+    are thin white separators.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 24pt, annotations are 16pt bold, axis labels are 16pt,
+          colorbar has clear labels - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis labels rotated 45° to prevent overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Square cells with optimal size, clear visual distinction between
+          correlation strengths
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r diverging colormap is colorblind-friendly, blue-white-red
+          spectrum works well
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 12x12 figure perfectly fills canvas, colorbar positioned well
+          with good margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Variable names are descriptive but no axis titles like "Asset Classes"
+          present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: White gridlines at 0.5 width are good, colorbar present but ticks
+          could include more intermediate values
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct annotated heatmap type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Rows and columns correctly mapped to variables, values to colors
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has annotations, colorbar, contrasting text colors, diverging colormap
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full -1 to +1 correlation range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar labeled "Correlation" with appropriate ticks
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "heatmap-annotated · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows full range: strong positive (0.65 Stocks-Crypto), moderate
+          (0.40, 0.35), weak (0.10, 0.05), negative (-0.25, -0.20), and perfect correlations
+          (1.00 diagonal)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial asset correlation matrix is an excellent real-world use
+          case with plausible relationships
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All correlation values are in valid -1 to +1 range with realistic
+          financial relationships
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though data is deterministic anyway)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: matplotlib.pyplot, numpy, pandas, seaborn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Leverages seaborn's sns.heatmap with annot=True, fmt=".2f", automatic
+          text contrast, cmap centering, square cells, and cbar_kws
+  verdict: APPROVED
diff --git a/plots/heatmap-basic/metadata/altair.yaml b/plots/heatmap-basic/metadata/altair.yaml
index 5063684806..4db6251b79 100644
--- a/plots/heatmap-basic/metadata/altair.yaml
+++ b/plots/heatmap-basic/metadata/altair.yaml
@@ -26,3 +26,172 @@ review:
   - Generic row/column labels (Row A, Col 1) could be more realistic (e.g., correlation
     matrix with variable names)
   - X-axis labels appear slightly rotated which is unnecessary for short labels
+  image_description: The plot displays an 8x8 heatmap with rows labeled "Row A" through
+    "Row H" and columns labeled "Col 1" through "Col 8". The colormap uses a blue-orange
+    diverging scheme (blueorange) with blue representing low values (~8-25) and orange/brown
+    representing high values (~100-110). Each cell displays its numeric value as text
+    annotation - white text on dark cells and black text on light cells. A clear diagonal
+    pattern is visible with higher values along the main diagonal descending from
+    top-left to bottom-right. The title "heatmap-basic · altair · pyplots.ai" appears
+    at the top. A vertical colorbar legend labeled "Value" shows the scale from approximately
+    20 to 100.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 16-20pt, annotations at 18pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells well-sized, clear color differentiation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue-orange diverging scheme is colorblind-safe, good choice
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight whitespace around edges
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Column" and "Row" are descriptive but generic (no units expected
+          for this type)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar present and well-placed, no grid needed for heatmap (cells
+          serve as grid)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap using mark_rect()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned to columns/rows, value to color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Matrix format, color intensity, diverging colormap with domainMid,
+          value annotations, colorbar legend all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All values visible within scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend title "Value" is accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "heatmap-basic · altair · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows diagonal pattern and value variation well, but lacks negative
+          values to fully demonstrate diverging colormap
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic row/column labels, plausible but not real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 8-110 are sensible for demonstration
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses alt.condition for text color, tooltip encoding, scale with domainMid,
+          but could use more interactive features
+  verdict: APPROVED
diff --git a/plots/heatmap-basic/metadata/bokeh.yaml b/plots/heatmap-basic/metadata/bokeh.yaml
index 450f9720ef..4e7eb1bd24 100644
--- a/plots/heatmap-basic/metadata/bokeh.yaml
+++ b/plots/heatmap-basic/metadata/bokeh.yaml
@@ -23,3 +23,173 @@ review:
   - Axis labels could include more context (e.g., Month 2024 or Product Category)
   - Could add HoverTool for enhanced interactivity in HTML version
   - Random data does not show clear patterns/clusters typical in real sales data
+  image_description: The plot displays a 6x8 heatmap showing "Monthly Sales Performance
+    by Product Category." The y-axis shows Product A through Product F, and the x-axis
+    shows months Jan through Aug. Each cell contains a numeric value (0-100) representing
+    sales scores. The Viridis colormap is used (purple for low values ~0, teal/green
+    for mid values ~50, yellow for high values ~100). Cell annotations use adaptive
+    text colors - white text on dark purple cells, black text on light yellow/green
+    cells. A vertical colorbar on the right is labeled "Sales Score" with tick marks
+    from 0 to 100. The title "heatmap-basic · bokeh · pyplots.ai" appears in the top-left.
+    Axis labels show "Month" and "Product". Background is light gray (#f8f8f8).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and cell annotations are all clearly readable.
+          Text sizes are well-scaled for 4800x2700.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere; cell annotations are perfectly centered.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells are clearly visible with good color differentiation.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is a colorblind-safe palette with excellent perceptual uniformity.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though slight asymmetry with colorbar placement.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Month", "Product") but lack units.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar is present and functional; grid is disabled (appropriate
+          for heatmap).
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap visualization.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (months), Y (products), values correctly mapped to color.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has colorbar legend, value annotations in cells as spec suggests.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Sales Score".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-basic · bokeh · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across cells with both high and low values; good
+          range coverage. Could show more clustering patterns.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales performance by product is a realistic, comprehensible
+          business scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 0-100 sales score is reasonable; 6 products × 8 months is within
+          spec range (5-50).
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save. No functions/classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API used.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html for interactivity).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, LinearColorMapper, ColorBar, LabelSet correctly.
+          Good use of Bokeh's rect glyph for heatmap. Also generates HTML for interactivity.
+          Could leverage more Bokeh-specific features like HoverTool.
+  verdict: APPROVED
diff --git a/plots/heatmap-basic/metadata/highcharts.yaml b/plots/heatmap-basic/metadata/highcharts.yaml
index ecbdc1f672..c7b3c0e4f8 100644
--- a/plots/heatmap-basic/metadata/highcharts.yaml
+++ b/plots/heatmap-basic/metadata/highcharts.yaml
@@ -25,3 +25,172 @@ review:
   - Data only shows positive values; spec mentions diverging colormap for +/- values
     (not implemented, though not strictly required for basic)
   - Total and Avg rows/columns semantically unclear in the context of the data
+  image_description: 'The heatmap displays a 6x8 matrix of website traffic data by
+    time period (rows: Morning, Midday, Afternoon, Evening, Night, Total) and day
+    of week (columns: Mon-Sun plus Avg). Colors range from white (low values ~0) to
+    deep blue (#306998, high values ~100). Each cell contains a numeric annotation
+    showing the exact value. The colorbar legend on the right shows the scale from
+    0-100. Notable patterns include: Thursday Afternoon shows the maximum value (100),
+    while Thursday Night and Midday show lower values (12, 14). Weekend columns (Sat,
+    Sun) show generally lower values as expected. Cell borders are white, providing
+    clear separation. Title uses the correct format at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and cell annotations are clearly readable. Tick
+          labels could be slightly larger but still legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are well-sized, color intensity clearly distinguishes values
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single-hue blue colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight extra whitespace at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Day of Week" and "Time Period"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid lines (heatmap uses cell borders instead which is appropriate),
+          but legend title is missing
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=days, Y=time periods, color=values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has colorbar, value annotations, meaningful patterns
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Shows full 0-100 range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color scale accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: heatmap-basic · highcharts · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across cells, patterns (weekends lower, midday higher),
+          but no diverging data (all positive)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic by day/time is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 0-100 scale for activity percentages is sensible, though "Total"
+          row semantics are unclear
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts_core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png ✓ but also outputs plot.html (acceptable)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts heatmap module, dataLabels, colorAxis, and tooltip.
+          Could leverage more advanced features like custom formatters or click handlers
+          for interactivity.
+  verdict: APPROVED
diff --git a/plots/heatmap-basic/metadata/letsplot.yaml b/plots/heatmap-basic/metadata/letsplot.yaml
index 0a367ee3f3..d54a823040 100644
--- a/plots/heatmap-basic/metadata/letsplot.yaml
+++ b/plots/heatmap-basic/metadata/letsplot.yaml
@@ -25,3 +25,171 @@ review:
   - White text on yellow cells (mid-range values) has slightly reduced contrast
   - The data does not show any clear patterns or clustering as mentioned in the spec
     notes
+  image_description: The heatmap displays a 6×8 matrix showing department performance
+    scores by quarter. The x-axis shows 6 departments (Sales, Marketing, R&D, Operations,
+    Finance, HR) with labels rotated at 45 degrees. The y-axis shows 8 quarters (Q1-Q8)
+    from top to bottom. Each cell contains a numerical score value displayed in bold
+    white text. The color scale uses a diverging gradient from blue (low ~20-30) through
+    yellow (mid ~50-60) to red (high ~80-90), with a color bar legend on the right
+    labeled "Performance Score". The title "heatmap-basic · letsplot · pyplots.ai"
+    appears at the top. Cell values range from approximately 23.4 to 89.6.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, cell annotations are clear white
+          bold text
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles are well-sized with clear color differentiation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Diverging color scale is good but uses red which may be problematic
+          for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, well-balanced layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive but lack units (Score is unitless so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is clear but positioned a bit far from the plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap using geom_tile
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Department, Y=Quarter, Fill=Score correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has value annotations, colorbar legend, diverging colormap
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the plot area
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Performance Score" with accurate scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "heatmap-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation across departments and quarters, but no clear clustering
+          or extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department performance scores by quarter is a real, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores 15-95 are realistic for performance metrics, though some values
+          seem arbitrary
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_tile and geom_text, scale_fill_gradient2
+          for diverging colors, but nothing particularly unique to lets-plot
+  verdict: APPROVED
diff --git a/plots/heatmap-basic/metadata/matplotlib.yaml b/plots/heatmap-basic/metadata/matplotlib.yaml
index fc81a7b343..3f9e6d5bc4 100644
--- a/plots/heatmap-basic/metadata/matplotlib.yaml
+++ b/plots/heatmap-basic/metadata/matplotlib.yaml
@@ -26,3 +26,166 @@ review:
     for more idiomatic matrix display
   - Department axis labels could include additional context (e.g., "Department" →
     "Department Correlation")
+  image_description: The plot displays an 8×8 correlation matrix heatmap showing relationships
+    between 8 company departments (Sales, Marketing, Support, Dev, HR, Finance, Ops,
+    Legal). The heatmap uses the RdBu_r diverging colormap with dark red representing
+    strong positive correlations (+1.0), dark blue representing strong negative correlations
+    (-1.0), and white/light colors representing near-zero correlations. The diagonal
+    shows perfect self-correlation (1.00) in dark red. Each cell contains its correlation
+    coefficient value (e.g., -0.30, 0.49, -1.00) with white text on dark backgrounds
+    and black text on light backgrounds for optimal readability. A vertical colorbar
+    on the right shows the scale from -1.0 to +1.0 labeled "Correlation Coefficient".
+    The title "heatmap-basic · matplotlib · pyplots.ai" appears at the top. X-axis
+    labels are rotated 45° for readability.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, tick labels 16pt, annotations 14pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, rotated x-labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cell annotations clear with adaptive text color (white/black)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r is a well-established colorblind-safe diverging colormap
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square aspect ratio perfect for matrix, good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels "Department" are descriptive but no units (appropriate for
+          categorical)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar well placed with label, no grid needed for heatmap
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Matrix values correctly mapped to colors
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging colormap ✓, value annotations ✓, colorbar legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full correlation range -1 to +1 displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately labeled "Correlation Coefficient"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive, negative, and zero correlations, symmetric matrix
+          with diagonal=1, but all correlations are equally distributed rather than
+          showing typical correlation patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department correlation matrix is a plausible business analytics scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly bounded to [-1, 1] correlation range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/heatmap-basic/metadata/plotly.yaml b/plots/heatmap-basic/metadata/plotly.yaml
index caff18b5a9..4857a1b0d3 100644
--- a/plots/heatmap-basic/metadata/plotly.yaml
+++ b/plots/heatmap-basic/metadata/plotly.yaml
@@ -25,3 +25,175 @@ review:
   - Colorbar title could include more context (e.g., "Monthly Sales ($K)")
   - Could leverage additional Plotly features like custom hover templates for enhanced
     interactivity
+  image_description: The plot displays a heatmap showing monthly sales data across
+    8 product categories (Electronics, Clothing, Food, Books, Sports, Home, Beauty,
+    Toys) for all 12 months (Jan-Dec). The heatmap uses a red-blue diverging colorscale
+    (RdBu_r), where deep red indicates high values (~100+ $K) and deep blue indicates
+    low values (near 0 or negative). Each cell contains a numeric annotation showing
+    the exact value. The title "heatmap-basic · plotly · pyplots.ai" is centered at
+    the top. The x-axis shows "Month" and the y-axis shows "Category". A colorbar
+    on the right indicates "Sales ($K)" with values ranging from approximately 0 to
+    100. The layout is clean with good proportions, and all text is clearly readable.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt, cell annotations
+          at 14pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cell sizes are well-proportioned, annotations clearly visible within
+          cells
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r is a colorblind-safe diverging colormap with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight room for improvement in margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Month" and "Category" are descriptive but lack units (though units
+          aren''t really applicable here)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar is well-placed with title and readable ticks; no grid lines
+          (appropriate for heatmap)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (columns), Y=categories (rows), color=values - correctly
+          implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses diverging colormap ✓, value annotations ✓, colorbar legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar title "Sales ($K)" is accurate but could be more descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "heatmap-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across categories and months with seasonal patterns
+          (summer/holiday boosts); includes one negative value (-2 in Beauty/Mar)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly retail sales by category is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 0-106 $K are reasonable for retail sales, though some randomness
+          leads to slight inconsistencies
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Heatmap with proper configuration, texttemplate for annotations,
+          plotly_white template; could leverage more Plotly-specific features like
+          hover customization
+  verdict: APPROVED
diff --git a/plots/heatmap-basic/metadata/plotnine.yaml b/plots/heatmap-basic/metadata/plotnine.yaml
index 5f63c3e7c8..53af36180f 100644
--- a/plots/heatmap-basic/metadata/plotnine.yaml
+++ b/plots/heatmap-basic/metadata/plotnine.yaml
@@ -27,3 +27,178 @@ review:
     Index or similar)
   - Cell annotation text color is always black, which reduces contrast on darker blue
     cells (e.g., Region G, Q1 2023 at -45.4)
+  image_description: The heatmap displays an 8x8 matrix showing performance metrics
+    across 8 regions (A-H on the y-axis) and 8 time periods (Q1 2023 through Q4 2024
+    on the x-axis). The color scheme uses a diverging palette with Python Blue (#306998)
+    for negative values and Python Yellow (#FFD43B) for positive values, with white
+    at the midpoint (0). Each cell contains its numerical value as a text annotation.
+    The colorbar legend on the right shows the "Value" scale ranging from approximately
+    -25 to +25. The title "heatmap-basic · plotnine · pyplots.ai" appears at the top.
+    The plot clearly shows a trend pattern where earlier quarters have predominantly
+    negative values (blue) and later quarters show positive values (yellow), demonstrating
+    regional performance improvement over time.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 14-16pt, cell
+          annotations at size 12 - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis labels rotated 45° to avoid
+          collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles are well-sized with clear white borders, annotations visible
+          on all backgrounds
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-white-yellow diverging palette is colorblind-safe (avoids red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight white space on right side near colorbar
+          but acceptable
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Time Period" and "Region" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid removed for clean heatmap look, legend well placed; minor:
+          panel_grid_major=element_blank removes subtle structure'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap using geom_tile
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time periods, Y=regions, fill=value correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging colormap for +/- values ✓, value annotations ✓, colorbar
+          legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Value" legend label correct with proper scale'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "heatmap-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both positive and negative values, trend patterns, regional
+          variation; could show more extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Performance metrics by region and quarter is a real, comprehensible
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values range from ~-45 to ~+47, plausible for performance metrics
+          but units undefined
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no unnecessary functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Good use of ggplot grammar with geom_tile + geom_text layering, but
+          scale_fill_gradient2 is basic; could leverage faceting or more advanced
+          theming
+  verdict: APPROVED
diff --git a/plots/heatmap-basic/metadata/pygal.yaml b/plots/heatmap-basic/metadata/pygal.yaml
index 71250b613f..58ea3fc51c 100644
--- a/plots/heatmap-basic/metadata/pygal.yaml
+++ b/plots/heatmap-basic/metadata/pygal.yaml
@@ -26,3 +26,173 @@ review:
     lacks native heatmap support)
   - Colorbar title Score could be more descriptive (e.g., Performance Score (%))
   - No explicit axis titles (could add Department and Quarter labels)
+  image_description: The plot displays an 8×8 matrix heatmap showing department performance
+    scores across quarters. Row labels on the left show departments (Sales, Marketing,
+    Engineering, Support, Finance, HR, Operations, R&D). Column labels at the bottom
+    show quarters (Q1 2023 through Q4 2024). Each cell contains a numeric score (58-97
+    range) with color intensity representing magnitude - lighter blues for lower scores,
+    darker blues for higher scores. A vertical colorbar on the right shows the "Score"
+    scale from 58 to 97 with a mid-value of 78. The title "heatmap-basic · pygal ·
+    pyplots.ai" appears at the top center. All text is clearly legible with good contrast,
+    and the layout uses a square 1:1 aspect ratio.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, row/column labels, and cell values all clearly readable at
+          large font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, all elements well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are well-sized for the 8×8 grid, clear visual distinction between
+          values
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight asymmetry with colorbar placement but acceptable
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Row/column labels are descriptive but no explicit axis title labels
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar present and functional, white cell borders serve as grid
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap visualization with matrix format
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Rows=departments, columns=quarters, color=score value
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has value annotations, colorbar legend, but no diverging colormap
+          (spec suggests for +/- data, not required here)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar labeled "Score" but could be more descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across departments and time, patterns visible (R&D
+          improving, Engineering consistently high)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Performance metrics across departments and quarters is a realistic
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores in 58-97 range are plausible performance metrics
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses custom class (MatrixHeatmap) which violates KISS principle,
+          though necessary for pygal
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 2
+        max: 1
+        passed: true
+        comment: 'Saves as plot.png (bonus: also saves plot.svg and plot.html)'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative extension of pygal Graph class to create custom heatmap,
+          leverages pygal SVG rendering, style system, and HTML embedding
+  verdict: APPROVED
diff --git a/plots/heatmap-basic/metadata/seaborn.yaml b/plots/heatmap-basic/metadata/seaborn.yaml
index ef94292634..f453e74bcd 100644
--- a/plots/heatmap-basic/metadata/seaborn.yaml
+++ b/plots/heatmap-basic/metadata/seaborn.yaml
@@ -28,3 +28,175 @@ review:
   - Could use sns.clustermap or add dendrograms to showcase seaborn clustering capabilities
     as mentioned in spec notes
   - Colorbar label is set twice (in cbar_kws and manually), which is redundant code
+  image_description: 'The heatmap displays a 7×12 matrix showing monthly performance
+    scores (0-100) across 7 departments (Sales, Marketing, Engineering, Support, Finance,
+    HR, Operations) over 12 months (Jan-Dec). The plot uses a diverging RdBu (Red-Blue)
+    colormap centered at 50, where blue indicates high performance (approaching 100)
+    and red indicates low performance (approaching 0). Each cell contains a white
+    numeric annotation showing the exact score. White grid lines separate the cells.
+    The colorbar on the right is labeled "Performance Score" with a 0-100 scale. The
+    title "heatmap-basic · seaborn · pyplots.ai" appears at the top. Axis labels "Month"
+    (x-axis) and "Department" (y-axis) are clearly visible. Notable patterns include:
+    Finance row is consistently blue (high scores 74-91), Sales shows strong first
+    half (80-95) then drops, Engineering has strong second half performance (75-95
+    in Aug-Nov), and HR has a mid-year dip (11 in April).'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt, annotations 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cell colors and annotations are perfectly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu diverging colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format appropriate for heatmap, good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Month" and "Department" but no units (appropriate
+          for categorical data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar well placed; white gridlines are effective but slightly
+          thick
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Rows=departments, Columns=months, color=values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging colormap ✓, value annotations ✓, colorbar
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-100 range shown with vmin/vmax
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Performance Score"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "heatmap-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows high/low values, consistent patterns (Finance), seasonal trends
+          (Sales, Engineering), mid-period dips (HR)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly department performance metrics is a real, comprehensible
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores 0-100 with realistic distribution and patterns
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot, numpy, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern seaborn API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.heatmap which is core seaborn, but doesn't leverage seaborn-specific
+          features like clustermap for hierarchical clustering
+  verdict: APPROVED
diff --git a/plots/heatmap-calendar/metadata/altair.yaml b/plots/heatmap-calendar/metadata/altair.yaml
index 4d31c6d3a4..d3a85b9db0 100644
--- a/plots/heatmap-calendar/metadata/altair.yaml
+++ b/plots/heatmap-calendar/metadata/altair.yaml
@@ -25,3 +25,178 @@ review:
   - Missing dates (if any existed) would not be visually distinct from zero-value
     cells
   - Could benefit from Altair interactivity features (selection, zoom/pan)
+  image_description: The plot displays a GitHub-style calendar heatmap showing "Daily
+    Contributions 2024". The visualization uses a green sequential colormap ranging
+    from very light green (0 contributions) to dark green (15 contributions). The
+    layout shows weekdays (Mon-Sun) on the y-axis and weeks of the year on the x-axis,
+    with month labels (Jan-Dec) positioned at the top. Each day is represented as
+    a rounded rectangle cell. The legend at the bottom shows the "Contributions" scale
+    from 0 to 15. The title correctly includes the spec-id, library name, and pyplots.ai
+    branding. Weekend days (Sat, Sun) visibly show lighter colors indicating less
+    activity, while weekdays show more variation and higher contribution levels.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt, weekday labels at 18pt, month labels at 20pt - all
+          clearly readable but tick labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, month labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Calendar cells are perfectly sized with rounded corners, appropriate
+          for the 365-day data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Green sequential colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, though slight imbalance with empty space
+          on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has day names but no explicit label; legend provides value
+          context but no axis title
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom, but no visible grid (appropriate for
+          heatmap, but legend could be better positioned)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calendar heatmap type with days as cells
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates on calendar grid, values to color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Weekday labels on y-axis ✓, month labels at top ✓, sequential colormap
+          ✓, color legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year 2024 displayed (365 days)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Contributions" legend correctly shows 0-15 scale'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Daily Contributions 2024 · heatmap-calendar · altair · pyplots.ai"
+          follows format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows weekday/weekend patterns well, variation across months, but
+          missing dates (neutral cells) are not distinctly shown
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GitHub-style contribution graph is a perfect, recognizable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 0-15 contributions per day is realistic, though scale could show
+          more variation in high values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png AND plot.html (correct, but filename is just "plot.png"
+          not in proper directory)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses mark_rect with cornerRadius, layered charts for month labels,
+          tooltips, but could leverage more Altair-specific features like interactivity
+          or selection
+  verdict: APPROVED
diff --git a/plots/heatmap-calendar/metadata/bokeh.yaml b/plots/heatmap-calendar/metadata/bokeh.yaml
index c6d17d472a..160bf923e9 100644
--- a/plots/heatmap-calendar/metadata/bokeh.yaml
+++ b/plots/heatmap-calendar/metadata/bokeh.yaml
@@ -21,3 +21,181 @@ review:
   - White cell borders provide excellent visual separation
   weaknesses:
   - Color bar label Contributions appears slightly truncated at the canvas edge
+  image_description: The plot displays a GitHub-style calendar heatmap for 2024. It
+    shows a grid of rectangular cells arranged with weekdays (Mon-Sun) on the y-axis
+    and months (Jan-Dec) on the x-axis. Each cell represents a day, colored using
+    a sequential green palette (Greens9) where lighter greens indicate lower contribution
+    counts and darker greens indicate higher activity. The title "GitHub Contributions
+    2024 · heatmap-calendar · bokeh · pyplots.ai" appears at the top left. A color
+    bar on the right shows the "Contributions" scale from 0 to approximately 14. The
+    layout clearly shows the typical GitHub contribution pattern with lower weekend
+    activity (lighter Sat/Sun rows) and occasional bursts of high activity (dark green
+    cells). White borders separate each cell for clarity.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title, axis labels, and tick labels are clearly readable. Font sizes
+          are appropriate for 4800x2700 canvas. Minor: title could be slightly larger.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere. Month labels, weekday labels, and color
+          bar are all clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Calendar cells are well-sized with appropriate spacing. The 0.9 width/height
+          with white borders creates clear separation.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Green sequential palette is colorblind-safe for this use case (single
+          hue progression).
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space. Plot fills majority of the area. Minor:
+          slight extra whitespace on left side before January.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Month" and "Day of Week" are clear and meaningful.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is disabled (appropriate for heatmap), but the color bar title
+          "Contributions" is partially cut off at the right edge.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calendar heatmap with days as cells, weeks as columns, weekdays
+          as rows.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates correctly mapped to week/weekday grid, values mapped to color
+          intensity.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: weekday labels on y-axis, month labels
+          along x-axis, sequential colormap, color scale legend.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year 2024 displayed, all values visible.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color bar accurately shows contribution scale.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "GitHub Contributions 2024 · heatmap-calendar ·
+          bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows variation: high/low activity days, weekend patterns, occasional
+          bursts. Could show more dramatic seasonal patterns.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GitHub contributions is a perfect, recognizable real-world scenario
+          for calendar heatmaps.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 0-14 are realistic for daily commits. Poisson distribution
+          with weekend reduction is sensible.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save. No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` is set.'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates plot.html which is fine for Bokeh.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, hover tooltips, ColorBar, LinearColorMapper.
+          Good Bokeh usage but could leverage more interactive features.
+  verdict: APPROVED
diff --git a/plots/heatmap-calendar/metadata/highcharts.yaml b/plots/heatmap-calendar/metadata/highcharts.yaml
index 5832308c41..f31ac6d759 100644
--- a/plots/heatmap-calendar/metadata/highcharts.yaml
+++ b/plots/heatmap-calendar/metadata/highcharts.yaml
@@ -24,3 +24,179 @@ review:
   - Bottom portion of canvas has noticeable empty space; layout could be more balanced
   - Legend title could include a subtitle explaining the scale
   - PNG has slight aspect ratio issue (2661px instead of 2700px)
+  image_description: |-
+    The plot displays a GitHub-style calendar heatmap showing daily activity for the year 2024. The visualization uses a grid layout with:
+    - **Y-axis**: Weekday labels (Mon through Sun) on the left side
+    - **X-axis**: Month labels (Jan through Dec) along the top
+    - **Colors**: A sequential green gradient from light gray (#ebedf0) for no activity to dark green (#216e39) for high activity, mimicking GitHub contribution graphs
+    - **Title**: "Daily Activity 2024 · heatmap-calendar · highcharts · pyplots.ai" at the top center
+    - **Legend**: Vertical color scale on the right showing "Commits" with values 0-16
+    - **Layout**: Each cell represents one day, with weeks as columns and days of the week as rows. White borders separate individual cells with rounded corners.
+    - **Data pattern**: Clear weekday vs weekend pattern visible (top rows for Sat/Sun show more gray/light cells), with seasonal variation showing higher activity in spring/fall months.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend text are all clearly readable at full
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; month labels and weekday labels are
+          well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cell sizes are appropriate for the data density, each day is clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses a sequential green colormap that is colorblind-safe (single
+          hue gradient)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but there is noticeable empty space at the bottom
+          and some imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Weekday labels are descriptive but no units (not applicable for this
+          plot type)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-placed; white cell borders serve as grid and work
+          well
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calendar heatmap implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Days correctly mapped to grid positions, weeks as columns, weekdays
+          as rows
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: weekday labels, month labels, sequential
+          colormap, color legend'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year of data displayed (365 days)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labeled as "Commits" with proper scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Daily Activity 2024 · heatmap-calendar · highcharts
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows weekday/weekend patterns and seasonal variation; missing days
+          handled as gray cells
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GitHub-style commit activity is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Commit values 0-15 are realistic; some edge values could be more
+          varied
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Outputs plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts heatmap module with custom color stops, tooltip formatter
+          with JavaScript, but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/heatmap-calendar/metadata/letsplot.yaml b/plots/heatmap-calendar/metadata/letsplot.yaml
index 7ca7f55b1d..31bcf2819f 100644
--- a/plots/heatmap-calendar/metadata/letsplot.yaml
+++ b/plots/heatmap-calendar/metadata/letsplot.yaml
@@ -24,3 +24,174 @@ review:
   - Axis labels are empty strings rather than descriptive (e.g., "Week of Year" for
     x, "Day of Week" for y)
   - Could use a more distinctive lets-plot feature like tooltips or flavor_high_contrast_dark
+  image_description: The plot displays a calendar heatmap for the year 2024 with weekday
+    labels (Mon-Sun) on the y-axis and month labels (Jan-Dec) on the x-axis. Each
+    day is represented as a rectangular tile with white borders. The color scale uses
+    a sequential gradient from light gray (#ebedf0) for zero/low values to blue (#306998)
+    for high values, with the legend labeled "Activity" showing a range of 0-30. The
+    data clearly demonstrates a GitHub-style contribution pattern with visibly reduced
+    activity on weekends (Saturday and Sunday rows are notably lighter). High-activity
+    days (darker blue tiles) are scattered throughout the year. The title correctly
+    uses the format "heatmap-calendar · letsplot · pyplots.ai".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis text at 18pt, legend text at 16-20pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, month labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles are well-sized with appropriate gaps, data density is optimal
+          for 365 days
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single-hue sequential colormap (gray to blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills ~60% of space with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: Empty x/y labels (uses ""), though weekday/month labels serve as
+          axis text
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid removed (appropriate for heatmap), legend well-placed on right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calendar heatmap with day tiles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date components correctly mapped to x (week) and y (weekday)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Weekday labels, month labels, sequential colormap, color legend all
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year (365 days) displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labeled "Activity" with appropriate scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-calendar · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows weekday patterns, high/low days, zero days, but missing dates
+          not explicitly shown
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GitHub-style contribution data is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 0-30 are realistic for daily contributions, though max ~30
+          is reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_tile, scale_fill_gradient, scale_y_reverse,
+          theme customization. Good use of lets-plot but nothing exceptionally distinctive
+          beyond standard ggplot patterns.
+  verdict: APPROVED
diff --git a/plots/heatmap-calendar/metadata/matplotlib.yaml b/plots/heatmap-calendar/metadata/matplotlib.yaml
index 6f7118aff1..98a0f475c1 100644
--- a/plots/heatmap-calendar/metadata/matplotlib.yaml
+++ b/plots/heatmap-calendar/metadata/matplotlib.yaml
@@ -27,3 +27,179 @@ review:
     operations
   - Missing some edge cases in data patterns (e.g., extended no-activity periods,
     project deadline spikes)
+  image_description: The plot displays a GitHub-style calendar heatmap showing daily
+    activity (commits) for the year 2024. The layout consists of a grid with 7 rows
+    (Mon-Sun weekdays labeled on the y-axis) and 53 columns (weeks of the year). Month
+    labels (Jan-Dec) appear at the top. Each cell represents a day, colored using
+    a green sequential colormap from light gray (#ebedf0) for zero activity to dark
+    green (#196127) for high activity. The cells are separated by white borders. A
+    horizontal colorbar at the bottom shows the scale from 0.0 to ~18+ commits, labeled
+    "Daily Activity (commits)". The title "heatmap-calendar · matplotlib · pyplots.ai"
+    appears at the top in a large, readable font. The visualization clearly shows
+    weekday vs weekend patterns, with weekdays generally showing more activity (more
+    green) than weekends.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, tick labels at 16pt, colorbar label at 16pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, month labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Calendar cells well-sized with clear white borders for separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: GitHub-style green colormap is colorblind-safe (single hue sequential)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas space, but colorbar is somewhat distant from the
+          plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Weekday labels are descriptive but no units applicable; colorbar
+          has units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No explicit grid (not needed for heatmap), colorbar is well placed
+          but shrink=0.4 makes it relatively small
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calendar heatmap type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Days correctly mapped to rows (Mon-Sun), weeks to columns
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: weekday labels on y-axis, month labels
+          at top, sequential colormap, handles missing dates with neutral color, color
+          scale legend included'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year 2024 displayed correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows value range
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "heatmap-calendar · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows weekday/weekend patterns, variation in activity, some high-activity
+          days, but could have more varied patterns (e.g., vacation gaps, project
+          sprints)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GitHub-style daily commit counts is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 0-18 commits per day is realistic for developer activity
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions or classes, follows imports → data → plot → save pattern
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib, numpy, pandas, LinearSegmentedColormap)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves as "plot.png" but the header says "Quality: pending" instead
+          of actual quality score'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pcolormesh effectively, LinearSegmentedColormap for custom colors,
+          proper axes manipulation. Could have used imshow with extent or other matplotlib-specific
+          features for cleaner code
+  verdict: APPROVED
diff --git a/plots/heatmap-calendar/metadata/plotly.yaml b/plots/heatmap-calendar/metadata/plotly.yaml
index 165224faeb..1f4092742c 100644
--- a/plots/heatmap-calendar/metadata/plotly.yaml
+++ b/plots/heatmap-calendar/metadata/plotly.yaml
@@ -27,3 +27,15 @@ review:
   - Could leverage more Plotly-specific features like custom hover data showing actual
     dates
   - Colorbar title could include more context
+  image_description: 'The plot displays a GitHub-style calendar heatmap for 2024 showing
+    contribution activity over the full year. The layout uses a grid with 7 rows (Mon-Sun)
+    and approximately 53 columns (weeks). Days are represented as cells with gaps
+    between them. A sequential green colorscale (from light gray #ebedf0 for zero/empty
+    to dark green #216e39 for high values) encodes contribution counts. Month labels
+    (Jan-Dec) appear along the top axis. Weekday labels (Mon-Sun) are on the left
+    y-axis. A vertical colorbar on the right shows the "Contributions" scale from
+    0-18. The title "GitHub Activity 2024 · heatmap-calendar · plotly · pyplots.ai"
+    is centered at the top in large font. The data shows realistic patterns: weekends
+    (Sat/Sun) have generally lighter activity, with occasional burst days showing
+    dark green.'
+  verdict: APPROVED
diff --git a/plots/heatmap-calendar/metadata/plotnine.yaml b/plots/heatmap-calendar/metadata/plotnine.yaml
index ebae4f7d46..08f9130074 100644
--- a/plots/heatmap-calendar/metadata/plotnine.yaml
+++ b/plots/heatmap-calendar/metadata/plotnine.yaml
@@ -26,3 +26,180 @@ review:
   - Weekday ordering shows Monday at top rather than following the spec suggested
     Sunday at top pattern
   - Legend could be positioned closer to the plot for better visual balance
+  image_description: 'The plot displays a calendar heatmap for "Daily Activity 2024"
+    with a GitHub-style contribution graph layout. The y-axis shows weekdays from
+    Sunday at the bottom to Monday at the top (Mon, Tue, Wed, Thu, Fri, Sat, Sun).
+    The x-axis displays month labels (Jan through Dec). Each day is represented as
+    a rectangular tile with color intensity indicating contribution count. The color
+    scale uses a sequential gradient from light gray (#ebedf0) for zero/low activity
+    to Python Blue (#306998) for high activity (up to 15 contributions). The legend
+    shows "Contributions" with a scale from 0 to 15. White borders separate the tiles.
+    The title reads "Daily Activity 2024 · heatmap-calendar · plotnine · pyplots.ai"
+    in bold. Clear patterns are visible: weekends (Sat/Sun) show lighter colors indicating
+    lower activity, while Q2 and Q4 months (Mar-Apr, Sep-Oct) show darker colors indicating
+    higher activity periods. Summer months (Jul-Aug) appear lighter.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold, axis labels (Mon-Sun, month names) are clearly
+          readable at 18pt, legend text is 16-18pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; weekday labels and month labels are
+          well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tile sizes are appropriate, white borders provide good separation,
+          color gradient is clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential light-to-dark gradient is colorblind-safe; single hue
+          progression
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas space, but there is some unused space on the right
+          side near the legend; plot could be slightly more centered
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels (weekdays, months) but no explicit unit description
+          for the y-axis (days of week is implicit)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid (appropriate for heatmap), legend is well-placed but could
+          be slightly larger
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calendar heatmap with days as cells, weeks as columns
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates correctly mapped to week/weekday positions, values to color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: weekday labels on y-axis, month labels,
+          sequential colormap, color scale legend'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year (365 days) displayed, all months visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Contributions" legend accurately represents the data'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows format "{description} · {spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows weekday patterns, seasonal trends, sprint periods; could show
+          more variation in daily values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GitHub-style contribution data is a perfect real-world scenario for
+          calendar heatmaps
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Contribution counts 0-15 are realistic for daily coding activity
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (geom_tile, scale_fill_gradient, theme_minimal),
+          but no advanced plotnine-specific features like faceting
+  verdict: APPROVED
diff --git a/plots/heatmap-calendar/metadata/pygal.yaml b/plots/heatmap-calendar/metadata/pygal.yaml
index 95cb26b995..4f3414bd0c 100644
--- a/plots/heatmap-calendar/metadata/pygal.yaml
+++ b/plots/heatmap-calendar/metadata/pygal.yaml
@@ -24,3 +24,180 @@ review:
   - Legend value ranges have overlapping boundaries (4-7 and 7-10 both include 7)
   - Code uses class/functions which violates KISS principle (though necessary for
     pygal custom chart)
+  image_description: 'The plot displays a GitHub-style calendar heatmap for the year
+    2024. The title "heatmap-calendar · pygal · pyplots.ai" is centered at the top.
+    The calendar grid shows 12 months (Jan-Dec) labeled along the top, with weekday
+    labels (Mon-Sun) on the left side. Each day is represented as a small rounded
+    rectangle cell. The color scheme uses GitHub''s signature green palette: light
+    gray (#ebedf0) for no activity, progressing through increasingly darker greens
+    (#9be9a8, #40c463, #30a14e, #216e39) for higher values. Below the grid is a color
+    legend showing "Less" to "More" with numeric ranges (0, 1-4, 4-7, 7-10, 10+).
+    Summary statistics at the bottom show "286 contributions in 366 days", "111 active
+    days · Longest streak: 3 days · Avg: 2.6 per active day". The layout is well-balanced
+    with the calendar grid taking appropriate space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, month labels, weekday labels, and legend text are all clearly
+          readable at full size. Slightly smaller than ideal but perfectly legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the visualization
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Calendar cells are well-sized and clearly visible. Color gradations
+          are distinguishable.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: GitHub's green sequential colormap is colorblind-friendly (single
+          hue variation)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with balanced margins, calendar grid fills
+          appropriate space
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for calendar heatmaps - no traditional axes. Month/weekday labels
+          serve this purpose.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed below the chart with clear numeric ranges
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calendar heatmap type with daily cells in calendar grid layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates mapped to grid positions, values mapped to colors correctly
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has weekday labels, month labels, sequential colormap, color legend.
+          Missing dates handled with gray cells.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year 2024 displayed correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend shows ranges but "4-7" and "7-10" overlap at boundary value
+          7
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "heatmap-calendar · pygal · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in daily activity, weekday patterns, seasonal trends.
+          Good mix of active/inactive days.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GitHub-style contribution data is a perfect real-world scenario for
+          calendar heatmaps
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 0-15 are realistic for daily commits. 366 days for 2024 leap
+          year is accurate.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: true
+        comment: Uses custom class extending pygal.Graph - necessary for this chart
+          type but violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 2
+        max: 2
+        passed: true
+        comment: Saves both plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Extends pygal's Graph class to create custom chart type, leverages
+          SVG rendering capabilities, custom styling
+  verdict: APPROVED
diff --git a/plots/heatmap-calendar/metadata/seaborn.yaml b/plots/heatmap-calendar/metadata/seaborn.yaml
index 8b7f28c6a4..402c4ad421 100644
--- a/plots/heatmap-calendar/metadata/seaborn.yaml
+++ b/plots/heatmap-calendar/metadata/seaborn.yaml
@@ -21,3 +21,177 @@ review:
   weaknesses:
   - Could use seaborn figure-level functions or additional styling features to better
     showcase the library
+  image_description: The plot displays a calendar heatmap showing daily contributions
+    over the year 2024. The visualization uses a green sequential colormap ("Greens")
+    with white cell borders. Weekdays (Mon-Sun) are labeled on the y-axis, and months
+    (Jan-Dec) are labeled on the x-axis. Each cell represents a day, with color intensity
+    indicating the contribution count (0-15). The colorbar on the right shows "Daily
+    Contributions" ranging from 0 to approximately 15. The title "heatmap-calendar
+    · seaborn · pyplots.ai" is displayed at the top. The plot shows a clear pattern
+    with higher activity (darker green) on weekdays and lower activity (lighter green)
+    on weekends (Sat/Sun rows are noticeably lighter). The layout fills the canvas
+    well with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, weekday/month labels at 16pt, colorbar labels at 14-16pt
+          - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are well-sized with good visibility, white borders clearly
+          delineate each day
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Green sequential colormap is colorblind-safe (single hue variation)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, colorbar properly positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No axis labels (x/y labels are intentionally empty, but spec doesn't
+          require units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar is well-placed and labeled; white cell borders serve as
+          grid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct calendar heatmap structure with days as cells, weeks as columns
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Weekdays on y-axis, weeks/months on x-axis, values as color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has weekday labels (Mon-Sun), month labels, sequential colormap,
+          color scale legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year 2024 displayed (365 days), all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Daily Contributions" with accurate scale
+          0-15
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "heatmap-calendar · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation in activity, weekend vs weekday patterns, zero-activity
+          days, high-activity days
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GitHub-style contribution graph scenario, realistic daily coding
+          activity pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 0-15 are realistic for daily contributions, exponential distribution
+          with weekday boost is plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, pandas, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.heatmap which is correct but basic; could leverage seaborn's
+          statistical capabilities or additional features
+  verdict: APPROVED
diff --git a/plots/heatmap-clustered/metadata/altair.yaml b/plots/heatmap-clustered/metadata/altair.yaml
index 2807ca5c2e..63bf61cc32 100644
--- a/plots/heatmap-clustered/metadata/altair.yaml
+++ b/plots/heatmap-clustered/metadata/altair.yaml
@@ -25,3 +25,178 @@ review:
   - Colorbar legend could benefit from more prominent formatting
   - The dendrogram line thickness could be slightly reduced for visual elegance
   - Missing interactive zoom/pan features that Altair supports
+  image_description: |-
+    The plot displays a clustered heatmap showing gene expression data for 20 genes across 15 samples. The visualization features:
+    - **Title**: "heatmap-clustered · altair · pyplots.ai" centered at the top in large black text
+    - **Dendrograms**: Row dendrogram on the left showing hierarchical clustering of genes, column dendrogram at the top showing clustering of samples - both rendered as dark gray lines
+    - **Heatmap**: 20×15 grid of colored rectangles representing expression values
+    - **Color scheme**: Diverging red-blue colormap (red = high expression ~6, white = near zero, blue = low expression ~-4)
+    - **Colorbar**: Vertical legend on the right labeled "Expression" with values from -4 to 6
+    - **Axis labels**: "Genes" on y-axis, "Samples" on x-axis, both with appropriate font sizes
+    - **Gene labels**: Gene_01 through Gene_20 (reordered by clustering) on y-axis
+    - **Sample labels**: Sample_01 through Sample_15 (reordered by clustering) on x-axis, displayed at -45° angle
+    - **Visible clusters**: Clear expression patterns visible - Gene clusters (01-05, 10-15) showing high expression in certain sample groups
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is clear at 28pt, axis labels at 18pt, tick labels at 14pt
+          - all readable but tick labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, sample labels angled at -45° prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Red-blue diverging colormap is colorblind-safe, centered at zero
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good composition with dendrograms and heatmap, slight imbalance with
+          legend positioning
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Genes" and "Samples"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-placed but no grid on heatmap (not needed), however
+          colorbar could have better contrast
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct clustered heatmap with dendrograms
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Genes on rows, samples on columns, expression values as color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both row and column dendrograms present, data reordered by clustering,
+          diverging colormap, colorbar legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data values visible, scale appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar labeled "Expression" with correct scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-clustered · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clustering patterns, gene groupings, sample groupings - could
+          show more distinct cluster boundaries
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression analysis is a perfect real-world application for
+          clustered heatmaps
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Expression values in realistic range (-4 to +6), though slightly
+          wide for typical log2 fold changes
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Code is script-style but slightly complex due to dendrogram coordinate
+          calculations
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (altair, numpy, pandas, scipy.cluster, scipy.spatial)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding and mark_rect for heatmap, mark_rule
+          for dendrograms, hconcat/vconcat for layout composition, but doesn't leverage
+          interactivity beyond tooltips
+  verdict: APPROVED
diff --git a/plots/heatmap-clustered/metadata/bokeh.yaml b/plots/heatmap-clustered/metadata/bokeh.yaml
index 2519854b77..8b828d1625 100644
--- a/plots/heatmap-clustered/metadata/bokeh.yaml
+++ b/plots/heatmap-clustered/metadata/bokeh.yaml
@@ -24,3 +24,186 @@ review:
   - Missing hover tooltips showing gene name, sample name, and exact value (key Bokeh
     interactive feature)
   - Colorbar title is rotated 90 degrees making it harder to read
+  image_description: The plot displays a clustered heatmap showing gene expression
+    data with 20 genes (rows) and 15 samples (columns). Hierarchical clustering dendrograms
+    are present on both the left side (row dendrogram) and top (column dendrogram),
+    rendered in blue (#306998). The heatmap uses a diverging RdBu colormap with red
+    representing positive expression values (up to +3.0) and blue representing negative
+    values (down to -3.0). Gene names (TNF, IL1B, CXCL8, IL6, IFNG, MYC, EGFR, ENO1,
+    PKM, GAPDH, LDHA, HK2, TP53, VEGFA, PLK1, BRCA1, BUB1, CDK1, CCNB1, AURKA) are
+    labeled on the right side. Sample names (N2_D through T2_A) are displayed at the
+    bottom at a 45-degree angle. A vertical colorbar on the far right shows "Expression
+    (z-score)" with tick marks. The title "heatmap-clustered · bokeh · pyplots.ai"
+    appears at the top center. Clear cluster structure is visible - tumor samples
+    (T prefix) cluster together on the right showing predominantly red (upregulated),
+    while normal samples (N prefix) cluster on the left showing predominantly blue
+    (downregulated).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, gene labels, sample labels, and colorbar text are all readable.
+          Sample labels at 45° angle are clear. Slightly smaller than ideal for the
+          canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Gene labels are well-spaced, sample
+          labels don't overlap.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells are clearly visible with appropriate sizing. Dendrogram
+          lines are visible and well-rendered.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: RdBu diverging colormap is generally colorblind-friendly (red-blue
+          distinction). Good choice for centered data.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good layout with dendrograms, heatmap, and colorbar well-proportioned.
+          Minor: some empty space in corners.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Samples" and "Genes" axis labels present with bold styling. Colorbar
+          has "Expression (z-score)" with units.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar is well-placed. White gridlines between cells are subtle.
+          No legend needed for this plot type.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct clustered heatmap with dendrograms
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Genes on rows, samples on columns, expression values in cells
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Dendrograms on both rows and columns, reordered data, diverging colormap,
+          colorbar legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible (-3 to +3 z-scores)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly shows expression z-score values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-clustered · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear cluster structure with tumor vs normal separation. Demonstrates
+          gene groupings by function. Could show more within-cluster variation.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression analysis is a canonical use case. Gene names are
+          real (CDK1, GAPDH, TP53, etc.). Sample naming (T=tumor, N=normal) is realistic.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Z-scores from -3 to +3 are appropriate for expression data. Most
+          values fall within expected range.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → clustering → plotting
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses CDN resource which is not deprecated but could be simplified
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's ColumnDataSource, ColorBar, figure layouts, and multi-figure
+          composition. However, doesn't leverage Bokeh's interactive features (hover
+          tooltips would be valuable for heatmaps).
+  verdict: APPROVED
diff --git a/plots/heatmap-clustered/metadata/highcharts.yaml b/plots/heatmap-clustered/metadata/highcharts.yaml
index 7493b96d97..685c930ab2 100644
--- a/plots/heatmap-clustered/metadata/highcharts.yaml
+++ b/plots/heatmap-clustered/metadata/highcharts.yaml
@@ -29,3 +29,186 @@ review:
   - Row dendrogram lines could be slightly thicker for better visibility at the rendered
     size
   - Minor excess whitespace at bottom of the chart
+  image_description: The plot displays a clustered heatmap showing gene expression
+    data across 20 genes (y-axis) and 12 samples (x-axis). The heatmap uses a diverging
+    blue-white-red colormap where blue indicates low/negative expression values (-4
+    to -2) and red indicates high/positive expression values (2 to 4). A column dendrogram
+    is displayed above the heatmap showing hierarchical clustering of samples, which
+    groups the Drug_A, Drug_C, Drug_B, and Control samples into distinct clusters.
+    A row dendrogram is displayed on the left side showing hierarchical clustering
+    of genes. The title reads "Gene Expression Clusters · heatmap-clustered · highcharts
+    · pyplots.ai" with a subtitle "Hierarchical clustering reveals drug-specific expression
+    patterns". The colorbar legend on the right shows the "Expression" scale from
+    -4 to 4. Sample labels are rotated at the bottom, and gene labels (Gene_01 through
+    Gene_20) are displayed on the left y-axis. The layout uses a square 1:1 aspect
+    ratio.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are clearly readable. Gene labels
+          on left side are slightly small but still legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Rotated x-axis labels are well spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Heatmap cells are clearly visible with good sizing. The dendrogram
+          lines are visible but could be slightly thicker for better visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-white-red diverging colormap is colorblind-safe and provides
+          excellent contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square format for clustered heatmap. Margins accommodate
+          dendrograms well, though there's slight excess whitespace at bottom.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Samples" and "Genes" are descriptive axis titles.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar is well-placed with "Expression" title. White cell borders
+          provide visual separation. No grid needed for heatmap.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct clustered heatmap with dendrograms.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Genes on rows, samples on columns, correctly mapped.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has dendrograms on both rows and columns, reordered data based on
+          clustering, diverging colormap, colorbar legend.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Expression values centered around zero, scale from -4 to 4 visible
+          in colorbar.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Expression".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "heatmap-clustered · highcharts · pyplots.ai"
+          in title.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear cluster structure with genes responding differently to
+          Drug_A, Drug_B, Drug_C, and Control. Multiple gene groups visible. Could
+          have slightly more variation in cluster patterns.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression microarray experiment is an excellent, realistic
+          application for clustered heatmaps.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Expression values in reasonable range (-4 to +4 log-fold change).
+          Sample and gene counts (12 samples, 20 genes) are appropriate for the visualization.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly follows KISS pattern but includes a helper function `get_dendro_paths()`
+          which adds complexity. Understandable given the need for custom dendrogram
+          SVG generation.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducible data generation.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used appropriately.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts and scipy APIs.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts heatmap module with custom SVG overlay for dendrograms.
+          Creative solution but dendrograms are not native Highcharts feature - implemented
+          via custom SVG overlay which works but is a workaround.
+  verdict: APPROVED
diff --git a/plots/heatmap-clustered/metadata/letsplot.yaml b/plots/heatmap-clustered/metadata/letsplot.yaml
index 244e0a7fc0..947d246ab3 100644
--- a/plots/heatmap-clustered/metadata/letsplot.yaml
+++ b/plots/heatmap-clustered/metadata/letsplot.yaml
@@ -28,3 +28,180 @@ review:
     full resolution
   - Legend is positioned somewhat far from the main heatmap due to ggbunch layout
     constraints
+  image_description: The plot displays a clustered heatmap with gene expression data
+    (20 genes × 15 samples). A column dendrogram at the top shows the hierarchical
+    clustering of samples, with clear branching structure. A row dendrogram on the
+    left shows the hierarchical clustering of genes. The heatmap uses a diverging
+    red-white-blue colormap (RdBu) where red indicates high expression (up to ~4),
+    white indicates neutral (0), and blue indicates low expression (down to ~-4).
+    Gene labels (Gene_01 through Gene_20) appear on the y-axis, and sample labels
+    (Sample_A through Sample_O) are shown on the x-axis rotated at 45 degrees. The
+    title "heatmap-clustered · letsplot · pyplots.ai" is displayed at the top. A legend
+    on the right shows "Expression (z-score)" with the color scale. The clustering
+    reveals clear patterns with distinct gene groups showing correlated expression
+    patterns across sample clusters.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is bold and readable at 24pt, axis labels readable, gene labels
+          at 10pt are slightly small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis labels rotated appropriately
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Heatmap tiles clearly visible with good spacing (0.95 width/height),
+          colors distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging blue-white-red colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of ggbunch for layout, dendrograms properly positioned;
+          slight margin imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Samples" and "Genes" descriptive labels'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid disabled as appropriate for heatmap, legend positioned well
+          but slightly far from plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct clustered heatmap with dendrograms
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Genes on rows, samples on columns, expression as fill
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has dendrograms on both axes, reordered by clustering, diverging
+          colormap, colorbar legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within heatmap bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Expression (z-score)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: `heatmap-clustered · letsplot · pyplots.ai`'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear cluster structure with 3 gene groups and 3 sample groups,
+          demonstrates clustering well; could show more variation in cluster sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression analysis is a perfect real-world scenario for clustered
+          heatmaps
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Z-score range of approximately -4 to 4 is realistic for expression
+          data
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Contains a helper function `get_dendrogram_segments()` which violates
+          KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Creative use of `ggbunch` for composite layout, `geom_tile` for heatmap,
+          `geom_segment` for dendrograms, `scale_fill_gradient2` for diverging colors;
+          demonstrates advanced lets-plot composition
+  verdict: APPROVED
diff --git a/plots/heatmap-clustered/metadata/matplotlib.yaml b/plots/heatmap-clustered/metadata/matplotlib.yaml
index aacc103bcb..031dcf1d14 100644
--- a/plots/heatmap-clustered/metadata/matplotlib.yaml
+++ b/plots/heatmap-clustered/metadata/matplotlib.yaml
@@ -27,3 +27,178 @@ review:
     types as suggested in spec
   - Tick label fontsize (14pt) slightly below the recommended 16pt for optimal legibility
     at full resolution
+  image_description: The plot displays a clustered heatmap showing gene expression
+    data across 15 genes and 12 samples. The title "heatmap-clustered · matplotlib
+    · pyplots.ai" appears at the top in clear, readable font. A row dendrogram on
+    the left shows hierarchical clustering of genes (IL6, TNF, IFNG, IL1B, CXCL8 grouped
+    together as immune genes; CCNB1, AURKA, BUB1, CDK1, PLK1 clustered as cell cycle
+    genes; LDHA, HK2, PKM, GAPDH, ENO1 clustered as metabolism genes). A column dendrogram
+    on top shows sample clustering with normal samples (N1_B, N1_A, N1_C, N2_B, N2_A,
+    N2_C) on the left and tumor samples (T2_A, T2_B, T2_C, T1_C, T1_A, T1_B) on the
+    right. The heatmap uses a diverging RdBu_r colormap with red indicating high expression
+    (~+2.5 z-score) and blue indicating low expression (~-2.5 z-score). A colorbar
+    on the right is labeled "Expression (z-score)". The clustering clearly reveals
+    the tumor vs. normal distinction and gene functional groupings.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title 24pt, tick labels 14pt are slightly below
+          recommended 16pt but still clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; X-axis labels rotated 45° to prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells clearly visible with good contrast between values
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r is an excellent diverging colormap, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with gridspec; slight imbalance with empty corner space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Gene and sample names are descriptive but no explicit axis titles
+          (x/y labels)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Colorbar well placed; no grid needed for heatmap; minor: colorbar
+          could be more prominent'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: clustered heatmap with dendrograms'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Matrix correctly mapped to heatmap cells
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: dendrograms on both axes, reordered rows/columns,
+          diverging colormap, colorbar'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric color scale around zero shows all data properly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled with z-score
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-clustered · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear cluster structure with multiple gene clusters and sample
+          groups; could benefit from more diverse expression patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent: real gene names (CDK1, GAPDH, IL6, etc.) with tumor vs.
+          normal comparison is a classic bioinformatics use case'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Z-scores in reasonable range (-2.5 to +2.5); slightly engineered
+          block patterns
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → clustering → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, scipy.cluster.hierarchy, scipy.spatial.distance)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib and scipy APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct settings
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib's gridspec for complex layout and scipy for clustering;
+          could leverage matplotlib's more advanced features like color bars for row/column
+          annotations
+  verdict: APPROVED
diff --git a/plots/heatmap-clustered/metadata/plotly.yaml b/plots/heatmap-clustered/metadata/plotly.yaml
index 43eb4950b0..7ee8ae33a7 100644
--- a/plots/heatmap-clustered/metadata/plotly.yaml
+++ b/plots/heatmap-clustered/metadata/plotly.yaml
@@ -27,3 +27,175 @@ review:
     labels are present
   - No row/column color bars for group annotations as suggested in spec Notes (could
     annotate gene pathways or tumor/normal status)
+  image_description: |-
+    The plot shows a clustered heatmap displaying gene expression data for 20 genes across 12 samples. The visualization features:
+    - **Dendrograms**: Blue hierarchical clustering trees on both the top (column clustering) and left (row clustering) sides
+    - **Heatmap**: A well-proportioned central heatmap using the RdBu_r diverging colormap (red=high expression ~3, blue=low expression ~-3)
+    - **Title**: Correctly formatted as "heatmap-clustered · plotly · pyplots.ai" centered at top
+    - **Gene labels** (rows): PLK1, CDK1, MYC, BUB1, AURKA, CCNB1, VEGFA, EGFR, BRCA1, TP53, PKM, HK2, LDHA, ENO1, GAPDH, CXCL8, IL1B, IFNG, TNF, IL6
+    - **Sample labels** (columns): N2_C, N2_A, N2_B, N1_C, N1_A, N1_B, T1_C, T1_A, T1_B, T2_C, T2_A, T2_B (rotated 45°)
+    - **Colorbar**: On the right with "Expression (z-score)" title, range -3 to 3
+    - **Clear clustering**: Normal samples (N) cluster together on the left, Tumor samples (T) on the right, demonstrating successful hierarchical clustering
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title is large and clear, gene/sample labels readable at full size,
+          colorbar text legible. Minor: could be slightly larger for optimal readability'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells clearly visible, dendrograms well-rendered with appropriate
+          line width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r diverging colormap is colorblind-safe for this use case
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good proportions, plot fills canvas well. Minor: slight imbalance
+          with left dendrogram area'
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No explicit axis labels (X/Y titles), only tick labels present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar well-positioned, no distracting grid
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct clustered heatmap with dendrograms
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Genes on rows, samples on columns, correctly reordered
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has dendrograms on both axes, reordering, diverging colormap centered
+          at zero, colorbar legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate z-score range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Colorbar shows values but lacks row/column group annotations mentioned
+          in spec Notes
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-clustered · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear cluster structure, tumor vs normal separation, but no
+          explicit group color bars for pathway annotation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression analysis with real gene names (CDK1, TP53, etc.)
+          and tumor/normal samples - excellent realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Z-score normalized expression values in typical -3 to 3 range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → clustering → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, plotly.graph_objects, make_subplots,
+          scipy clustering)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotly API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses make_subplots for multi-panel layout, go.Heatmap with hover
+          templates, colorbar customization. Could leverage more interactivity features.
+  verdict: APPROVED
diff --git a/plots/heatmap-clustered/metadata/plotnine.yaml b/plots/heatmap-clustered/metadata/plotnine.yaml
index 961c991568..0ea91cee1e 100644
--- a/plots/heatmap-clustered/metadata/plotnine.yaml
+++ b/plots/heatmap-clustered/metadata/plotnine.yaml
@@ -24,3 +24,175 @@ review:
   - Sample labels at bottom could be slightly larger for better readability at full
     resolution
   - Code is necessarily complex due to manual dendrogram coordinate calculation
+  image_description: |-
+    The plot displays a clustered heatmap showing gene expression data for 12 genes (rows) across 8 samples (columns). The visualization features:
+    - **Color scheme**: Diverging palette using Python Blue (#306998) for low expression, white for zero, and Python Yellow (#FFD43B) for high expression
+    - **Dendrograms**: Row dendrogram on the left showing hierarchical gene clustering; column dendrogram on top showing sample clustering
+    - **Labels**: Gene names (Gene1-Gene12) on the right side; sample names (SampleA-SampleH) at the bottom, rotated 45°
+    - **Title**: "heatmap-clustered · plotnine · pyplots.ai" centered at top
+    - **Legend**: Colorbar labeled "Expression Level" showing scale from -2 to 2
+    - **Clustering patterns**: Clear groupings visible - genes 1-4 cluster together (high in samples A-D), genes 5-8 form another cluster (opposite pattern), genes 9-12 show mixed patterns
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title is large and clear, gene/sample labels readable, legend text
+          clear. Minor: sample labels slightly small.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap tiles well-sized, dendrograms clearly visible with appropriate
+          line thickness
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue-white-yellow diverging scheme is colorblind-friendly, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space, dendrograms proportional to heatmap
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: No traditional axis labels (appropriate for heatmap), but gene/sample
+          names serve this purpose
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed; no grid needed for heatmap
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct clustered heatmap with dendrograms
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Matrix values correctly mapped to colors, rows/columns properly labeled
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both row and column dendrograms present, reordering applied, diverging
+          colormap centered at zero, colorbar legend included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible in heatmap
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows expression level scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-clustered · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear clustering patterns with distinct gene groups, mixed
+          expression genes. Could have included more dramatic outliers.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression analysis is a perfect real-world application for
+          clustered heatmaps
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Expression values -2.5 to 3.0 reasonable for standardized gene expression
+          data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Code is linear but lengthy due to manual dendrogram construction;
+          necessary complexity for plotnine
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics with geom_tile, geom_segment,
+          geom_text, and scale_fill_gradient2. Manual dendrogram construction is necessary
+          since plotnine lacks native clustering support.
+  verdict: APPROVED
diff --git a/plots/heatmap-clustered/metadata/seaborn.yaml b/plots/heatmap-clustered/metadata/seaborn.yaml
index 4566de4913..5b8a6eb468 100644
--- a/plots/heatmap-clustered/metadata/seaborn.yaml
+++ b/plots/heatmap-clustered/metadata/seaborn.yaml
@@ -28,3 +28,180 @@ review:
     creating visual clutter
   - The two separate legends (Gene Group and Condition) could potentially be better
     positioned
+  image_description: 'The plot displays a clustered heatmap showing gene expression
+    data with hierarchical clustering dendrograms on both rows (left side) and columns
+    (top). The heatmap uses a diverging RdBu_r colormap ranging from -3 (blue) to
+    +3 (red), centered at 0 (white). There are 30 genes (rows) and 20 samples (columns).
+    Row annotations show three gene groups: Immune (blue), Metabolic (yellow), and
+    Signaling (green). Column annotations show two conditions: Control (pink/coral)
+    and Treatment (light blue). The title "heatmap-clustered · seaborn · pyplots.ai"
+    appears at the top. A colorbar labeled "Expression (z-score)" is positioned on
+    the left. Two legends appear on the right side - one for Gene Group and one for
+    Condition. The clustering has successfully grouped Treatment samples (T12-T19)
+    together on the left and Control samples (C01-C10) on the right, and genes are
+    reorganized showing distinct expression patterns.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt is clear, axis labels at 20pt readable, tick labels
+          at 12pt legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All text elements are well separated with rotated x-axis labels
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells clearly visible with white gridlines, optimal sizing
+          for 30x20 matrix
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r is a standard colorblind-safe diverging colormap
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout but the two legends on the right side create slight imbalance;
+          Condition legend overlaps with Genes y-axis label
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Samples" and "Genes" are descriptive labels'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: White gridlines work well; legends are functional but Condition legend
+          placement partially overlaps with y-axis label
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct clustered heatmap with dendrograms
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Matrix properly displayed with genes on rows, samples on columns
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Dendrograms on both axes, row/column color annotations, diverging
+          colormap, colorbar present, Ward's method used
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within -3 to +3 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Both Gene Group and Condition legends correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows distinct gene clusters with different expression patterns,
+          demonstrates hierarchical relationships
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression analysis is a perfect real-world application; Control
+          vs Treatment experimental design is standard
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Z-score values from -3 to +3 are realistic for normalized expression
+          data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.pyplot, numpy, pandas, seaborn,
+          Patch)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of sns.clustermap() which is seaborn's signature function
+          for this plot type; includes row_colors, col_colors, Ward linkage, dendrogram_ratio
+          customization
+  verdict: APPROVED
diff --git a/plots/heatmap-correlation/metadata/altair.yaml b/plots/heatmap-correlation/metadata/altair.yaml
index 7e7d65c074..f82423471d 100644
--- a/plots/heatmap-correlation/metadata/altair.yaml
+++ b/plots/heatmap-correlation/metadata/altair.yaml
@@ -25,3 +25,176 @@ review:
   - Triangle masking not implemented (spec mentions "consider masking upper or lower
     triangle")
   - Could add tooltips for interactivity since HTML is also saved
+  image_description: The plot displays an 8×8 correlation matrix heatmap showing financial
+    metrics (Revenue, Profit, Expenses, Employees, Market Cap, Debt, Assets, R&D Spend).
+    The color scheme uses a diverging blue-white-red palette where dark red represents
+    strong positive correlations (1.0), dark blue represents strong negative correlations
+    (-1.0), and white/light colors represent values near zero. Each cell is annotated
+    with the correlation coefficient to 2 decimal places. The title "heatmap-correlation
+    · altair · pyplots.ai" appears at the top. Variable names are displayed on both
+    axes with bold text. A vertical colorbar legend labeled "Correlation" appears
+    on the right side ranging from -1.0 to 1.0. The diagonal shows perfect correlations
+    (1.00) in dark red. The layout is square and well-balanced with white borders
+    between cells.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold at 28pt, axis labels are 15pt bold, cell
+          annotations are 14pt bold, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels and annotations are well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are appropriately sized, white borders provide clear separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Red-blue diverging colormap is colorblind-friendly (not pure red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good square layout but plot could fill slightly more canvas area
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis titles provided (just variable names, no descriptive labels
+          like "Variables")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well placed with appropriate sizing, white cell borders serve
+          as grid
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct correlation matrix heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly mapped to both axes
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has diverging colormap, annotations, symmetric matrix; missing optional
+          triangle masking
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Colorbar correctly fixed to -1 to 1
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend clearly shows Correlation scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-correlation · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows positive correlations (Profit-Market Cap: 0.66), negative
+          correlations (Assets-Profit: -0.67), near-zero values (Employees-Market
+          Cap: 0.01), diagonal 1.0s'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial metrics is a realistic scenario for correlation analysis
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All correlation values are in valid -1 to 1 range with sensible relationships
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative grammar with mark_rect and mark_text layering,
+          conditional color encoding for text visibility, but could leverage more
+          interactive features
+  verdict: APPROVED
diff --git a/plots/heatmap-correlation/metadata/bokeh.yaml b/plots/heatmap-correlation/metadata/bokeh.yaml
index 1d0965f964..69361a08ac 100644
--- a/plots/heatmap-correlation/metadata/bokeh.yaml
+++ b/plots/heatmap-correlation/metadata/bokeh.yaml
@@ -25,3 +25,179 @@ review:
     Indicators" on both axes)
   - Could leverage more Bokeh-specific features like CustomJS callbacks or interactive
     filtering
+  image_description: The plot displays a correlation matrix heatmap for 8 economic
+    indicators (GDP, Unemployment, Inflation, Interest Rate, Stock Index, Consumer
+    Conf., Housing, Exports). Only the lower triangle is shown (with diagonal), which
+    is a good practice for correlation matrices. The heatmap uses a diverging blue-white-red
+    color scheme (RdBu) where dark blue indicates strong positive correlations (+1.0),
+    white indicates near-zero correlations, and red indicates negative correlations.
+    Each cell contains the correlation coefficient value (2 decimal places) with white
+    text on dark backgrounds and dark text on light backgrounds for readability. The
+    title "heatmap-correlation · bokeh · pyplots.ai" is displayed at the top. A color
+    bar on the right shows the correlation scale from -1 to 1. X-axis labels are rotated
+    at 45 degrees. Both axes are labeled "Economic Indicators".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and cell annotations are all clearly readable
+          at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis labels rotated to prevent overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are well-sized, correlation values clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu diverging palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of square aspect ratio, but plot could use slightly more
+          canvas area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Economic Indicators") but no units (correlations
+          don't have units though)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Color bar is well placed, but grid not applicable (disabled correctly)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct chart type: correlation matrix heatmap'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly assigned to both axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: diverging colormap centered at zero,
+          cell annotations, symmetric matrix with triangle masking, colorbar with
+          -1 to 1 range'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Colorbar shows full -1 to 1 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color bar labeled "Correlation" with correct scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows strong positive (0.85), strong negative (-0.82), weak correlations,
+          and diagonal (1.00). Good range of values demonstrated
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Economic indicators with plausible correlation relationships (e.g.,
+          GDP-Unemployment negative, GDP-Stock Index positive)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for economic correlations, though some relationships
+          might be slightly exaggerated
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png (correct) but figure size is 3600x3600 instead
+          of recommended 4800x2700 or 3600x3600 - wait, 3600x3600 is acceptable per
+          quality criteria
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses HoverTool for interactivity, ColumnDataSource, LabelSet. Generates
+          both PNG and HTML output. Could have used more advanced Bokeh features.
+  verdict: APPROVED
diff --git a/plots/heatmap-correlation/metadata/highcharts.yaml b/plots/heatmap-correlation/metadata/highcharts.yaml
index 7b734e47f5..50af8c88b5 100644
--- a/plots/heatmap-correlation/metadata/highcharts.yaml
+++ b/plots/heatmap-correlation/metadata/highcharts.yaml
@@ -24,3 +24,178 @@ review:
   - Color bar could benefit from a title label (e.g., Correlation)
   - Variable names on axes lack context (could optionally include units or fuller
     descriptions)
+  image_description: The plot displays a correlation matrix heatmap for 7 financial
+    metrics (Revenue, Profit, Expenses, Growth, ROI, Debt, Assets). It uses a lower-triangle
+    format showing only the unique correlations plus diagonal. The color scheme is
+    a diverging blue-white-orange palette, where blue represents negative correlations,
+    white represents zero, and orange represents positive correlations. Each cell
+    is annotated with the correlation coefficient to 2 decimal places. The title follows
+    the required format "heatmap-correlation · highcharts · pyplots.ai" with a subtitle
+    "Financial Metrics Correlation Matrix". Y-axis labels are horizontal and readable,
+    while X-axis labels are rotated at approximately 315 degrees. A vertical color
+    bar on the right shows the scale from -1 to 1 with tick marks at 0.5 intervals.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title is prominent, axis labels are
+          well-sized, cell annotations are bold and legible'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, cell annotations are well-centered
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Cells are appropriately sized, colors are distinct; minor deduction
+          as some mid-range values have similar shades
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe blue-orange diverging palette (no red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square 3600x3600 canvas appropriate for matrix; slight
+          excess margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Variable names are descriptive but no axis titles (acceptable for
+          correlation matrix)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Color bar is well-placed but could use a title label
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap visualization for correlation matrix
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly mapped to both axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: diverging colormap centered at 0, cell
+          annotations with 2 decimals, symmetric handling via lower triangle'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Color axis correctly fixed to -1 to 1
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color bar accurately represents correlation scale
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: false
+        comment: Format correct "heatmap-correlation · highcharts · pyplots.ai", but
+          subtitle adds extra text (minor)
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows full range of correlations: strong positive (0.89), strong
+          negative (-0.58), weak (0.15), and moderate values'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial metrics correlation is a genuine real-world use case (e.g.,
+          Profit-ROI high correlation, Debt-ROI negative correlation are realistic)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: false
+        comment: Values are realistic; minor note that some correlation values may
+          be slightly idealized
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports are used, though numpy random seed is set but base_corr
+          is deterministic
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts Python API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Highcharts heatmap module, custom tooltips with JavaScript
+          formatter, colorAxis with stops, data labels with formatters
+  verdict: APPROVED
diff --git a/plots/heatmap-correlation/metadata/letsplot.yaml b/plots/heatmap-correlation/metadata/letsplot.yaml
index a0b07132e8..c72d01cd7f 100644
--- a/plots/heatmap-correlation/metadata/letsplot.yaml
+++ b/plots/heatmap-correlation/metadata/letsplot.yaml
@@ -23,3 +23,180 @@ review:
   - No upper/lower triangle masking (spec mentions this as optional but would enhance)
   - Axis labels are generic "Financial Metric" rather than more descriptive
   - Legend could be slightly more prominent
+  image_description: The plot displays an 8×8 correlation matrix heatmap for financial
+    metrics. The heatmap uses a diverging color scheme with blue (#2166AC) for negative
+    correlations, white for zero, and red (#B2182B) for positive correlations. Each
+    cell contains bold black text showing the correlation coefficient to 2 decimal
+    places. The diagonal shows 1.00 values (self-correlation) in deep red. Variables
+    include Revenue, Marketing, Employees, Satisfaction, Profit, Market Share, Innovation,
+    and Debt Ratio. The x-axis labels are rotated 45 degrees. A colorbar legend labeled
+    "Correlation" appears on the right side with range -1 to 1. The title "heatmap-correlation
+    · letsplot · pyplots.ai" is prominently displayed at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (28pt), axis labels are 22pt, tick labels
+          16pt, cell values 14pt bold - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis labels rotated 45° prevent overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tile sizes are well-proportioned, correlation values clearly visible
+          against backgrounds
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-white-red diverging scheme is colorblind-friendly (uses blue/red
+          not green/red)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square format is appropriate for correlation matrix; some whitespace
+          on right due to legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels say "Financial Metric" but lack specific units (not applicable
+          for correlation though)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: White borders between tiles work well; legend is well-placed but
+          could be slightly larger
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap visualization for correlation matrix
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly mapped to both axes, correlations to fill color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging colormap centered at 0 ✓, annotations with 2 decimals ✓,
+          symmetric matrix ✓, colorbar range -1 to 1
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 8 variables visible, full -1 to 1 range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Correlation" with accurate scale
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "heatmap-correlation · letsplot · pyplots.ai"
+          but missing capitalization consistency
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows full range of correlations: strong positive (0.78 Revenue-Marketing),
+          strong negative (-0.45 Profit-Debt Ratio), near-zero (Innovation vs others),
+          and perfect diagonal'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial metrics scenario is realistic and coherent (profit negatively
+          correlates with debt ratio, revenue positively with employees)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All correlation values are in valid -1 to 1 range with realistic
+          magnitudes
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs both plot.png and plot.html (extra file, minor issue)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with geom_tile, scale_fill_gradient2,
+          coord_fixed; includes interactive tooltips in HTML output. Could have used
+          more advanced lets-plot features like ggtb() for tooltip builder or faceting
+  verdict: APPROVED
diff --git a/plots/heatmap-correlation/metadata/matplotlib.yaml b/plots/heatmap-correlation/metadata/matplotlib.yaml
index 62408a381c..205fb85f9d 100644
--- a/plots/heatmap-correlation/metadata/matplotlib.yaml
+++ b/plots/heatmap-correlation/metadata/matplotlib.yaml
@@ -26,3 +26,178 @@ review:
     more informative or omitted on one axis
   - Font size of 14pt for cell annotations is adequate but could be slightly larger
     (16pt) for better readability at this image size
+  image_description: 'The plot displays a lower-triangular correlation matrix heatmap
+    showing relationships between 8 economic indicators: GDP Growth, Inflation, Unemployment,
+    Interest Rate, Stock Index, Consumer Conf., Industrial Prod., and Trade Balance.
+    The colormap is RdBu_r (red-blue diverging) with red indicating positive correlations
+    and blue indicating negative correlations. The colorbar on the right ranges from
+    -1.00 to 1.00. Each cell is annotated with correlation coefficients to 2 decimal
+    places (e.g., -0.89, 0.72, 0.81). The diagonal shows 1.00 (perfect self-correlation).
+    Text colors adapt based on background intensity - white text on dark cells, black
+    text on light cells. The title reads "heatmap-correlation · matplotlib · pyplots.ai"
+    in bold at the top. X-axis labels are rotated 45 degrees for readability. The
+    layout is square (12x12) with clean margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, tick labels 16pt, annotations 14pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; rotated x-labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are well-sized, annotations clearly visible with adaptive text
+          colors
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r diverging colormap is colorblind-friendly for correlation
+          matrices
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format perfect for symmetric matrix; plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Economic Indicators") but no units (appropriate
+          for correlation)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar well-placed; no grid needed for heatmap (cells serve as
+          grid)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap visualization for correlation matrix
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables on both axes, correlation values in cells
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging colormap centered at zero ✓, annotations with 2 decimals
+          ✓, symmetric matrix ✓, upper triangle masked ✓, colorbar range -1 to 1
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All correlations visible from -0.91 to 1.00
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar labeled "Correlation Coefficient"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "heatmap-correlation · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows strong positive (0.81), strong negative (-0.91), weak correlations,
+          and near-zero values; excellent range coverage
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Economic indicators with plausible correlations (GDP-Unemployment
+          inverse, Inflation-Interest positive)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All correlations within valid -1 to 1 range with realistic magnitudes
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though data is deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' (correct filename but no path verification)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses imshow with colormap, colorbar customization, text annotations
+          with conditional coloring. Could use matplotlib's built-in heatmap styling
+          more.
+  verdict: APPROVED
diff --git a/plots/heatmap-correlation/metadata/plotly.yaml b/plots/heatmap-correlation/metadata/plotly.yaml
index 6cb523da0e..ff78d509b4 100644
--- a/plots/heatmap-correlation/metadata/plotly.yaml
+++ b/plots/heatmap-correlation/metadata/plotly.yaml
@@ -23,3 +23,176 @@ review:
   weaknesses:
   - X-axis labels are rotated 45 degrees which works but could be cleaner with horizontal
     orientation for this number of variables
+  image_description: The plot displays a lower-triangular correlation matrix heatmap
+    with 8 financial metrics (Revenue, Profit, Expenses, Assets, Debt, ROI, Growth,
+    Risk). The colorscale uses a red-white-blue diverging scheme (RdBu_r) where dark
+    red indicates strong positive correlations (e.g., Revenue-Profit at 0.93), white
+    indicates near-zero correlations, and blue indicates negative correlations (e.g.,
+    ROI-Risk at -0.41). Each cell shows the correlation coefficient to 2 decimal places
+    with dynamic text coloring (white text on dark backgrounds, black on light). The
+    diagonal shows 1.00 values. Variable names appear on both axes with the x-axis
+    labels rotated 45 degrees. A colorbar on the right labeled "Pearson Correlation"
+    shows the scale from -1 to 1. The title correctly follows the format "heatmap-correlation
+    · plotly · pyplots.ai".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 32pt, axis titles 24pt, tick labels 18pt, annotations 18pt
+          - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells well-sized, correlation values clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu diverging colorscale is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but slight imbalance with empty upper triangle space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Financial Metrics" on both axes
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No explicit grid (appropriate for heatmap), colorbar well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap correlation matrix
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly on both axes, symmetric matrix
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging colormap centered at zero ✓, annotations with 2 decimal
+          places ✓, lower triangle masking ✓, colorbar range -1 to 1
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Colorbar shows full -1 to 1 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar labeled "Pearson Correlation" with clear tick marks
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "heatmap-correlation · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows full range: strong positive (0.93), moderate positive (0.44),
+          weak (0.07), negative correlations (-0.41, -0.20)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial metrics correlation is a real, comprehensible scenario
+          (Revenue/Profit highly correlated, Risk negatively correlated with ROI)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Correlation values realistic but some relationships (Expenses~Revenue
+          0.73) might be stronger than typical
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png but comment mentions 4800x2700 while actual is 4800x2700
+          (1600x900 * scale=3)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: 'Good use of Plotly features: custom hover templates with correlation
+          strength interpretation, HTML export for interactivity, but could leverage
+          more interactive features like click events'
+  verdict: APPROVED
diff --git a/plots/heatmap-correlation/metadata/plotnine.yaml b/plots/heatmap-correlation/metadata/plotnine.yaml
index 5a419152e7..ae6e39966a 100644
--- a/plots/heatmap-correlation/metadata/plotnine.yaml
+++ b/plots/heatmap-correlation/metadata/plotnine.yaml
@@ -25,3 +25,181 @@ review:
   - Legend could be positioned slightly closer to the heatmap to reduce whitespace
   - Data does not include any extreme negative correlations (below -0.35) to show
     the full range of the diverging colormap blue end
+  image_description: 'The plot displays a lower-triangular correlation matrix heatmap
+    for 8 portfolio assets (Stock_A, Stock_B, Stock_C, Bonds, Gold, Real_Estate, Oil,
+    Tech_Index). The heatmap uses a diverging color scheme: deep blue (#2166AC) for
+    negative correlations, white for zero correlation, and deep red (#B2182B) for
+    positive correlations. Each cell is annotated with the correlation coefficient
+    to 2 decimal places in black text. The colorbar on the right shows "Correlation
+    Coefficient" ranging from -1.0 to 1.0. The title "heatmap-correlation · plotnine
+    · pyplots.ai" is bold and centered at the top. X-axis labels are rotated 45 degrees
+    for readability. The plot uses a square 1:1 aspect ratio with coord_fixed, and
+    grid lines are removed for cleaner visualization.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 26pt, axis titles 22pt, tick labels 16pt, cell annotations
+          14pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; x-axis labels rotated 45° to prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tiles are well-sized, annotations clearly visible, good contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-white-red diverging scheme is colorblind-safe (RdBu variant)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of 12×12 square canvas; slight whitespace on right due to
+          legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive "Portfolio Asset" labels but no units (correlation matrices
+          don't have units, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid removed (appropriate for heatmap); legend well-placed but could
+          be slightly closer
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap for correlation matrix
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly mapped to both axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging colormap ✓, centered at zero ✓, annotations with 2 decimals
+          ✓, lower triangle to reduce redundancy ✓, colorbar range -1 to 1
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All correlations displayed correctly (-1 to 1)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Correlation Coefficient"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "heatmap-correlation · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows positive correlations (stocks), negative correlations (bonds
+          vs stocks), near-zero correlations (gold), and diagonal (1.00). Minor: could
+          include one extreme negative (-0.8+) for fuller range'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial portfolio correlation is a perfect real-world scenario
+          for this plot type
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for financial assets; correlations range from
+          -0.35 to 0.91 which is plausible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not strictly needed since data is
+          hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png ✓ but uses separate width/height in save() instead
+          of relying solely on figure_size
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: 'Uses plotnine grammar of graphics well: geom_tile + geom_text +
+          scale_fill_gradient2 + coord_fixed + theme customization. Could leverage
+          more plotnine-specific features like faceting or position adjustments, but
+          grammar usage is appropriate for this plot type.'
+  verdict: APPROVED
diff --git a/plots/heatmap-correlation/metadata/pygal.yaml b/plots/heatmap-correlation/metadata/pygal.yaml
index f0595dcfef..4f7331cb99 100644
--- a/plots/heatmap-correlation/metadata/pygal.yaml
+++ b/plots/heatmap-correlation/metadata/pygal.yaml
@@ -24,3 +24,187 @@ review:
     lack of native heatmap support
   - Slight excess white space between title and matrix could be reduced
   - Colorbar placement could be slightly closer to the main matrix
+  image_description: The plot displays an 8×8 correlation matrix heatmap for financial/business
+    metrics (Revenue, Profit, Customers, Marketing, R&D Spend, Employees, Market Share,
+    Stock Price). The diverging color scheme goes from blue (negative correlations)
+    through white (zero) to red (positive correlations), centered at zero. Each cell
+    is annotated with correlation values to 2 decimal places. The title "heatmap-correlation
+    · pygal · pyplots.ai" is at the top. Row labels are on the left side, column labels
+    are rotated 45° at the bottom with axis title "Business Metrics" below them. A
+    vertical colorbar on the right shows the gradient from +1.00 to -1.00 with "Correlation"
+    label. The diagonal shows perfect correlations (1.00) in dark red. Strong positive
+    correlations visible between Revenue-Profit (0.82), Profit-Stock Price (0.85).
+    Negative correlations shown in blue like Profit-R&D Spend (-0.22). The layout
+    is clean on a square canvas with good white margins.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title large and prominent, row/column
+          labels well-sized, correlation values legible within cells, colorbar labels
+          visible'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, column labels rotated to prevent overlap,
+          good spacing between all elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Matrix cells are well-sized and clearly visible, rounded corners
+          add polish, white cell borders provide separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging blue-white-red colormap is colorblind-friendly (blue vs
+          red distinguishable), text color adapts for contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of square canvas for symmetric matrix, but some excess white
+          space at top between title and matrix
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes labeled with "Business Metrics" title, descriptive variable
+          names on both axes
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid needed for heatmap (appropriate), but colorbar placement
+          could be closer to the matrix
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct correlation matrix heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Symmetric matrix with variables on both axes correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging colormap centered at zero ✓, annotations with 2 decimals
+          ✓, symmetric matrix ✓, colorbar with -1 to 1 range
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Colorbar fixed at -1 to +1, all values within range displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar clearly labeled with "Correlation" and scale values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "heatmap-correlation · pygal · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows positive correlations (0.82, 0.85), negative correlations (-0.22,
+          -0.15), weak correlations (0.12, -0.08), perfect diagonal (1.00)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial portfolio metrics is a realistic scenario where correlation
+          analysis is commonly used
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All correlation values are realistic and within valid range [-1,
+          1]
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses custom class CorrelationHeatmap extending Graph, which is necessary
+          for pygal but violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set (though not strictly needed since data
+          is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal components, sys for path handling)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (via render_to_png), also outputs plot.svg and
+          plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Creates custom Graph subclass to extend pygal capabilities, uses
+          Style for theming, generates SVG+HTML+PNG outputs. However, pygal does not
+          natively support heatmaps, so this is essentially a custom implementation
+          within pygal's rendering framework rather than using built-in chart types.
+  verdict: APPROVED
diff --git a/plots/heatmap-correlation/metadata/seaborn.yaml b/plots/heatmap-correlation/metadata/seaborn.yaml
index 7a4b3a2fac..4f9187652a 100644
--- a/plots/heatmap-correlation/metadata/seaborn.yaml
+++ b/plots/heatmap-correlation/metadata/seaborn.yaml
@@ -23,3 +23,175 @@ review:
   weaknesses:
   - 'Minor: Colorbar label is set twice (via cbar_kws and separately on cbar.ax),
     which is redundant but not harmful'
+  image_description: The plot displays a lower-triangular correlation matrix heatmap
+    for 9 real estate variables (Price, Area, Bedrooms, Bathrooms, Age, Garage Spots,
+    Lot size, Distance, Crime Index). The heatmap uses the RdBu_r diverging colormap
+    with dark red indicating strong positive correlations (e.g., 0.87 between Price
+    and Area), blue indicating negative correlations (e.g., -0.37 between Price and
+    Age), and white/light colors near zero. Each cell displays its correlation value
+    to 2 decimal places in white text. The upper triangle is masked to reduce redundancy.
+    The colorbar on the right shows the range from -1.0 to 1.0 with "Correlation Coefficient"
+    label. Variable names appear on both axes with rotated x-axis labels. The title
+    follows the required format.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, tick labels at 14pt, annotations at 14pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, rotated x-labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Cells are square and well-sized, linewidths separate cells cleanly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r diverging colormap is colorblind-friendly with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square aspect ratio perfect for correlation matrix, good canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Variable names include units where applicable (sq ft, years, mi,
+          $K)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid (appropriate for heatmap), but colorbar label is duplicated
+          (set twice)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct correlation matrix heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables on both axes, correlation values in cells
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diverging colormap centered at 0, annotations with 2 decimals, triangle
+          mask, fixed -1 to 1 range
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Colorbar shows full -1 to 1 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "heatmap-correlation · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows full range of correlations: strong positive (0.87), moderate
+          (0.53), weak (0.08), negative (-0.37), near-zero (0.02)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real estate features with plausible relationships (area/bedrooms
+          correlated, age/price negatively correlated)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 9 variables is within recommended 5-15 range, correlation values
+          are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of sns.heatmap with mask, annot, fmt, center, cbar_kws,
+          linewidths
+  verdict: APPROVED
diff --git a/plots/hexbin-basic/metadata/bokeh.yaml b/plots/hexbin-basic/metadata/bokeh.yaml
index 187be85db1..729efbd568 100644
--- a/plots/hexbin-basic/metadata/bokeh.yaml
+++ b/plots/hexbin-basic/metadata/bokeh.yaml
@@ -12,6 +12,15 @@ preview_thumb: https://storage.googleapis.com/pyplots-images/plots/hexbin-basic/
 preview_html: https://storage.googleapis.com/pyplots-images/plots/hexbin-basic/bokeh/plot.html
 quality_score: 95
 review:
-  strengths: []
+  strengths:
+  - Excellent manual hexbin implementation using patches for full control
+  - Three distinct density clusters are clearly visible at centers (-2,-2), (2,2),
+    and (0,3)
+  - Beautiful color gradient showing density variation from low (dark purple) to high
+    (yellow)
+  - Good hexagon spacing with white borders for visual clarity
+  - Comprehensive font sizing appropriate for high-resolution output
+  - Both PNG and HTML output (useful for interactive viewing)
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/hexbin-basic/metadata/highcharts.yaml b/plots/hexbin-basic/metadata/highcharts.yaml
index d3fe7960a6..6c7424cfc2 100644
--- a/plots/hexbin-basic/metadata/highcharts.yaml
+++ b/plots/hexbin-basic/metadata/highcharts.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/hexbin-basic/metadata/letsplot.yaml b/plots/hexbin-basic/metadata/letsplot.yaml
index 5895695e6b..92ca4f6ab8 100644
--- a/plots/hexbin-basic/metadata/letsplot.yaml
+++ b/plots/hexbin-basic/metadata/letsplot.yaml
@@ -26,3 +26,180 @@ review:
     (tooltips, hover info)
   - Some sparse hexagons at the edges are very small, making density harder to read
     in low-count areas
+  image_description: 'The plot displays a hexagonal binning visualization showing
+    density of 2D point data. Three distinct density clusters are visible: one in
+    the upper-right quadrant (around x=3, y=3) with moderate density (cyan/teal colors),
+    one in the lower-left area (around x=-2, y=1) with moderate density, and one highly
+    concentrated cluster in the lower-center (around x=0, y=-3) showing the highest
+    density with yellow/bright green colors (count ~120). The background uses a light
+    gray color (#FAFAFA), with subtle dashed gray grid lines. The viridis colormap
+    ranges from dark purple (low count ~20) through cyan/teal to bright yellow (highest
+    count ~120). The color bar on the right shows the "Count" legend. Title is correctly
+    formatted as "hexbin-basic · letsplot · pyplots.ai" at the top. Axis labels are
+    "X Coordinate" and "Y Coordinate". The 16:9 aspect ratio is properly implemented.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend text are all clearly readable
+          at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Hexagons are well-sized and clearly visible; density patterns are
+          easy to discern; minor deduction as some sparse edge hexagons are quite
+          small
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions overall; slight excess whitespace on right edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Coordinate" and "Y Coordinate" but no units
+          (acceptable for generic coordinate data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and appropriate alpha; legend is
+          well-placed on the right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct hexagonal binning plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y coordinates correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: color bar for density scale, viridis
+          colormap, hexagonal bins with visible density patterns'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labeled as "Count" representing bin density
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "hexbin-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows multiple density clusters with varying intensities; demonstrates
+          the key hexbin feature of revealing density patterns; could show more extreme
+          density variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Clustered bivariate data is plausible for density visualization;
+          scenario is generic but appropriate for demonstrating hexbin capabilities
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Coordinate values and point counts are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Minor issue: imports element_line, element_rect which could be consolidated,
+          but all imports are used'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses lets-plot's ggplot2-style grammar with geom_hex, scale_fill_viridis,
+          and theme customization; could leverage more lets-plot specific features
+          like tooltips or interactivity in the HTML output
+  verdict: APPROVED
diff --git a/plots/hexbin-basic/metadata/matplotlib.yaml b/plots/hexbin-basic/metadata/matplotlib.yaml
index f1db59ca85..303a31cabe 100644
--- a/plots/hexbin-basic/metadata/matplotlib.yaml
+++ b/plots/hexbin-basic/metadata/matplotlib.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/hexbin-basic/metadata/plotly.yaml b/plots/hexbin-basic/metadata/plotly.yaml
index 21d984e1ea..628f543d4d 100644
--- a/plots/hexbin-basic/metadata/plotly.yaml
+++ b/plots/hexbin-basic/metadata/plotly.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/hexbin-basic/metadata/plotnine.yaml b/plots/hexbin-basic/metadata/plotnine.yaml
index d08ad05e8e..38299fee07 100644
--- a/plots/hexbin-basic/metadata/plotnine.yaml
+++ b/plots/hexbin-basic/metadata/plotnine.yaml
@@ -12,6 +12,13 @@ preview_thumb: https://storage.googleapis.com/pyplots-images/plots/hexbin-basic/
 preview_html: null
 quality_score: 98
 review:
-  strengths: []
+  strengths:
+  - Excellent visual output showing three distinct density clusters
+  - Proper use of viridis colormap as specified
+  - Clean hexagonal tessellation with visible boundaries
+  - All spec requirements met (color bar, appropriate gridsize, clustered data)
+  - The custom hexbin implementation is mathematically correct and produces professional
+    results
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/hexbin-basic/metadata/pygal.yaml b/plots/hexbin-basic/metadata/pygal.yaml
index 849dc91d57..d445988c63 100644
--- a/plots/hexbin-basic/metadata/pygal.yaml
+++ b/plots/hexbin-basic/metadata/pygal.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/hexbin-basic/metadata/seaborn.yaml b/plots/hexbin-basic/metadata/seaborn.yaml
index 8f246c0a17..d11a8ee1fc 100644
--- a/plots/hexbin-basic/metadata/seaborn.yaml
+++ b/plots/hexbin-basic/metadata/seaborn.yaml
@@ -25,3 +25,168 @@ review:
   - Marginal distributions from JointGrid not visible in final output, reducing seaborn-distinctive
     value
   - Consider log scale for colormap when density varies widely as spec suggests
+  image_description: 'The plot displays a hexagonal binning visualization with three
+    distinct density clusters on a white background. The hexagons are colored using
+    the viridis colormap (dark purple to yellow), with the brightest yellow hexagons
+    indicating the highest point density (~70 counts). The X-axis is labeled "X Coordinate"
+    (range -2 to 8), and the Y-axis is labeled "Y Coordinate" (range -2 to 8). A vertical
+    colorbar on the right shows "Point Count" ranging from ~5 to 70. The title "hexbin-basic
+    · seaborn · pyplots.ai" is displayed at the top. Three cluster centers are visible:
+    one around (2, 2), one around (6, 6), and one around (4-5, 5-6). The layout is
+    clean with subtle grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and colorbar text are all clearly
+          readable at appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Hexagon bins are appropriately sized with gridsize=35, clearly showing
+          density patterns
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions, though marginal distributions from JointGrid are
+          not visible in output
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Coordinate", "Y Coordinate") but lack
+          units for the GPS context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; colorbar well placed but no legend needed
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct hexagonal binning chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y coordinates correctly assigned to axes
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has colorbar for density scale, uses viridis colormap; mincnt=1 used
+          instead of considering log scale for wide density variation as spec suggested
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar label "Point Count" is accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "hexbin-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple density clusters with varying intensities; demonstrates
+          hexbin advantage over scatter for large datasets
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: GPS coordinates for urban traffic hotspots is a realistic scenario,
+          though axis labels don't reflect this context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 50,000 points is appropriate for demonstrating hexbin utility
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: pandas is imported but could be avoided; matplotlib.pyplot not explicitly
+          imported though used via seaborn
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/histogram-2d/metadata/altair.yaml b/plots/histogram-2d/metadata/altair.yaml
index 36b8b0ce72..46766a02b9 100644
--- a/plots/histogram-2d/metadata/altair.yaml
+++ b/plots/histogram-2d/metadata/altair.yaml
@@ -25,3 +25,175 @@ review:
   - Could enhance with Altair interactivity features (tooltips showing bin range and
     count)
   - Minor layout imbalance with slightly more whitespace on left side
+  image_description: The plot displays a 2D histogram heatmap showing a bivariate
+    normal distribution with positive correlation. The visualization uses rectangular
+    bins colored with the viridis colormap (dark purple for low counts, through blue/teal,
+    to bright yellow for high counts ~20+). The X axis ranges from approximately -4.0
+    to 3.2 and is labeled "X Value". The Y axis ranges from approximately -3.4 to
+    3.6 and is labeled "Y Value". A vertical colorbar on the right shows the "Count"
+    scale from ~1 to ~22. The title reads "histogram-2d · altair · pyplots.ai" at
+    the top center. The density pattern clearly shows the positive correlation (0.7)
+    with the highest concentration (yellow bins) near the origin, gradually decreasing
+    outward in an elliptical pattern along the diagonal.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bins are appropriately sized with 40 bins per axis, density patterns
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout overall but some wasted space on left edge; plot fills
+          ~60% of canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Value", "Y Value") but without units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Colorbar well placed, but no grid visible (acceptable for heatmaps,
+          minor deduction)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D histogram heatmap using mark_rect with binning
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly mapped to quantitative axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, viridis colormap as recommended, appropriate bin
+          count
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible on both axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled as "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-2d · altair · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows correlation pattern well; density variation visible; could
+          benefit from more distinct clusters or edge cases
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Bivariate normal is appropriate for demonstrating 2D histograms;
+          generic but plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standard normal scale values are sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses declarative encoding with bin aggregation and count(); could
+          add interactivity with .interactive() or tooltips
+  verdict: APPROVED
diff --git a/plots/histogram-2d/metadata/bokeh.yaml b/plots/histogram-2d/metadata/bokeh.yaml
index 0a45c61c09..e459cdbb92 100644
--- a/plots/histogram-2d/metadata/bokeh.yaml
+++ b/plots/histogram-2d/metadata/bokeh.yaml
@@ -23,3 +23,164 @@ review:
   - Axis labels are generic (X Value, Y Value) instead of contextual labels with units
   - Could add HoverTool to show bin counts on mouseover for the HTML version
   - Data scenario could be more realistic (e.g., financial returns, physics measurements)
+  image_description: The plot displays a 2D histogram heatmap showing the joint distribution
+    of two continuous variables. The visualization uses the Viridis colormap (dark
+    purple for low density transitioning through blue and green to bright yellow for
+    high density). The data clearly shows a positive correlation pattern (diagonal
+    band from lower-left to upper-right), consistent with the bivariate normal distribution
+    with 0.7 correlation specified in the code. The highest density region (yellow,
+    ~45 counts) is centered around (0.5, 0.5). A colorbar labeled "Count" on the right
+    indicates density values ranging from 0 to ~45. The title "histogram-2d · bokeh
+    · pyplots.ai" is displayed at the top-left. X-axis is labeled "X Value" (-3.5
+    to 3.5), Y-axis is labeled "Y Value" (-3 to 4). Grid lines are appropriately disabled
+    for the heatmap. The plot fills the canvas well with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are readable but could be slightly
+          larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap bins are clearly visible with good color contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight asymmetry with colorbar
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units (generic X/Y Value)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid correctly disabled, colorbar present but title could be more
+          descriptive
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D histogram heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, perceptually uniform colormap used
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar shows correct count range
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: histogram-2d · bokeh · pyplots.ai'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows correlation, density variation, but no extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Bivariate normal is plausible but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for normalized data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple linear structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/histogram-2d/metadata/highcharts.yaml b/plots/histogram-2d/metadata/highcharts.yaml
index e628f5e639..56f45034e9 100644
--- a/plots/histogram-2d/metadata/highcharts.yaml
+++ b/plots/histogram-2d/metadata/highcharts.yaml
@@ -24,3 +24,178 @@ review:
   - No grid lines to help read values at specific positions
   - Colorbar could have more descriptive title (e.g., Frequency Count instead of just
     Count)
+  image_description: The plot displays a 2D histogram heatmap showing the joint distribution
+    of correlated financial asset returns. The heatmap uses the viridis colormap (dark
+    purple for low counts, transitioning through blue/teal to bright yellow/green
+    for high counts ~80). The density pattern clearly shows a positive correlation
+    between Asset A Return (%) on the x-axis (ranging from approximately -3.2 to 9.9)
+    and Asset B Return (%) on the y-axis (ranging from approximately -0.4 to 15.5).
+    The highest density (yellow/green cells) is centered around (5%, 8%), matching
+    the specified mean values. A vertical colorbar on the right side labeled "Count"
+    shows the scale from 0 to 80. The title "histogram-2d · highcharts · pyplots.ai"
+    is displayed at the top center.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title, axis labels, and tick labels are all readable. Font sizes
+          are appropriately scaled for 4800x2700. Minor: some tick labels are sparse
+          (every 5th bin).'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap bins are clearly visible with good color differentiation,
+          density patterns are easily discernible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space, plot fills majority of area. Minor: slightly
+          more margin on top than needed'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Asset A Return (%)" and "Asset B
+          Return (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No visible grid lines, colorbar is functional but legend title "Count"
+          could include "Frequency" for clarity
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D histogram heatmap with rectangular bins
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly mapped to the two continuous variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes colorbar showing density scale as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible, axes appropriately scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents count values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-2d · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear density pattern and correlation. Could benefit from slightly
+          more extreme outliers to demonstrate the tails better.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial returns scenario is realistic and commonly analyzed
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Return percentages are realistic (-3% to 10% for Asset A, -0.4% to
+          15.5% for Asset B). Slightly wide range for Asset B but plausible.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts and numpy APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: 'Saves as plot.png ✓ (Note: also saves plot.html for interactive
+          version)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts heatmap module with custom colorAxis stops. Could
+          leverage more Highcharts-specific features like data labels on hover or
+          zoom capabilities in HTML version.
+  verdict: APPROVED
diff --git a/plots/histogram-2d/metadata/letsplot.yaml b/plots/histogram-2d/metadata/letsplot.yaml
index f8a808d8b3..e88dc82a46 100644
--- a/plots/histogram-2d/metadata/letsplot.yaml
+++ b/plots/histogram-2d/metadata/letsplot.yaml
@@ -23,3 +23,173 @@ review:
     tooltips or hover info)
   - Grid/legend styling could be refined - legend appears somewhat distant from main
     plot area
+  image_description: The plot displays a 2D histogram heatmap showing the joint distribution
+    of Customer Age (years) on the x-axis and Annual Spending ($k) on the y-axis.
+    The visualization uses a viridis colormap with colors ranging from dark purple
+    (low counts) through blue and teal to bright yellow/green (high counts around
+    20). The data shows a clear elliptical density pattern centered approximately
+    at age 42 and spending $55k, demonstrating positive correlation between age and
+    spending. The colorbar on the right shows "Count" ranging from about 5 to 20.
+    The title "histogram-2d · letsplot · pyplots.ai" appears at the top. The plot
+    uses a minimal theme with subtle gray grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is bold and readable at 24pt, axis labels at 20pt, tick labels
+          at 16pt. Slightly smaller than optimal for 4800px output but still clear.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bin sizes are well-chosen (25x25), density variations are clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses viridis colormap which is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space, plot fills most of the area. Minor: legend
+          could be positioned slightly better'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Customer Age (years)"
+          and "Annual Spending ($k)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha, but legend positioning appears a bit far
+          from the plot area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D histogram/heatmap with rectangular bins
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly assigned to continuous variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has colorbar showing count scale, uses viridis colormap as recommended
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-2d · letsplot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows bivariate distribution with correlation, density variation
+          from sparse edges to dense center
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer age vs annual spending is a plausible market research scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Age range 18-75 and spending $5-120k are realistic values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, lets_plot, os, shutil)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic geom_bin2d which is standard ggplot2 grammar. Does not
+          leverage lets-plot specific interactive features or advanced capabilities
+  verdict: APPROVED
diff --git a/plots/histogram-2d/metadata/matplotlib.yaml b/plots/histogram-2d/metadata/matplotlib.yaml
index d864c4a8d4..887af48fab 100644
--- a/plots/histogram-2d/metadata/matplotlib.yaml
+++ b/plots/histogram-2d/metadata/matplotlib.yaml
@@ -24,3 +24,170 @@ review:
   - Missing grid (though acceptable for heatmaps, a subtle grid could help read values)
   - Could showcase more matplotlib-specific features like hexbin alternative or marginal
     histograms
+  image_description: The plot displays a 2D histogram heatmap showing a bivariate
+    normal distribution with positive correlation. The viridis colormap ranges from
+    dark purple (low counts ~5) through teal/green to bright yellow (high counts ~45).
+    The data forms a clear elliptical pattern oriented diagonally from bottom-left
+    to top-right, centered around (0,0), demonstrating the 0.7 correlation coefficient.
+    A vertical colorbar on the right indicates "Count" with well-sized tick labels.
+    The title follows the correct format "histogram-2d · matplotlib · pyplots.ai".
+    Axis labels show "X Value" and "Y Value". Empty bins appear as white/transparent,
+    making the density pattern clearly visible.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: 40 bins provides excellent resolution for 5000 points, density patterns
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, colorbar properly positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("X Value", "Y Value") but lack units or context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid present (acceptable for heatmap), colorbar well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D histogram heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly assigned to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, viridis colormap used, empty bins hidden (cmin=1)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-2d · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows correlation and density clustering well, but doesn't demonstrate
+          multimodal or asymmetric distributions
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Bivariate normal is a classic statistical example, but generic labels
+          reduce real-world context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standard normal values (-3 to 3) are sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses basic hist2d; could leverage matplotlib's hexbin, marginal histograms,
+          or norm parameter for log scaling
+  verdict: APPROVED
diff --git a/plots/histogram-2d/metadata/plotly.yaml b/plots/histogram-2d/metadata/plotly.yaml
index cb095956b3..8ea49ccb33 100644
--- a/plots/histogram-2d/metadata/plotly.yaml
+++ b/plots/histogram-2d/metadata/plotly.yaml
@@ -25,3 +25,176 @@ review:
   - Colorbar positioning creates slight visual imbalance with the marginal histogram
     on the right
   - No grid lines on marginal histograms
+  image_description: The plot displays a 2D histogram heatmap showing the joint distribution
+    of two correlated stock returns (Stock A vs Stock B). The main visualization uses
+    the Viridis colormap (dark purple for low counts transitioning through blue, green
+    to yellow for high counts ~70). A clear positive correlation pattern is visible
+    as an elongated diagonal density from lower-left to upper-right centered near
+    (0, 0). Marginal 1D histograms in blue (#306998) appear at the top (X distribution)
+    and right side (Y distribution), showing bell-curve shaped distributions. The
+    title "histogram-2d · plotly · pyplots.ai" is centered at the top. Axis labels
+    show "Stock A Daily Return (%)" and "Stock B Daily Return (%)" with values ranging
+    from approximately -3 to 3. A colorbar labeled "Count" appears on the right side.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 22pt, tick fonts at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: 40x40 bins provide excellent resolution for 5000 points, density
+          pattern clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of subplots with marginal histograms, but colorbar positioning
+          creates slight imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Stock A Daily Return (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid visible on the main 2D histogram (though this is acceptable
+          for heatmaps, the marginal histograms also lack subtle grid lines)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D histogram heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y continuous values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, Viridis colormap used, marginal histograms included
+          (optional feature implemented)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-2d · plotly · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows correlation structure, density variation, demonstrates where
+          scatter plots become unreadable (5000 points)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial returns scenario is real-world applicable, matches spec
+          applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Daily returns in ±3% range with 0.7 correlation is realistic for
+          correlated stocks
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, plotly.graph_objects, and make_subplots used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of make_subplots with shared axes, Histogram2d trace,
+          and interactive HTML export
+  verdict: APPROVED
diff --git a/plots/histogram-2d/metadata/plotnine.yaml b/plots/histogram-2d/metadata/plotnine.yaml
index 2e5fcf97ae..cb4b6dbc10 100644
--- a/plots/histogram-2d/metadata/plotnine.yaml
+++ b/plots/histogram-2d/metadata/plotnine.yaml
@@ -23,3 +23,176 @@ review:
   - Axis labels are generic (X Value, Y Value) rather than context-specific; could
     use a realistic scenario like Asset Returns vs Index Returns
   - Grid lines could be more subtle (lower alpha) for a cleaner appearance
+  image_description: The plot displays a 2D histogram heatmap showing a bivariate
+    normal distribution with positive correlation. The visualization uses rectangular
+    bins colored with the viridis colormap, ranging from dark purple (low counts ~10)
+    through teal/cyan to bright yellow (high counts ~40+). The highest density appears
+    in the center around (0, 0) with an elongated elliptical pattern extending from
+    lower-left to upper-right, clearly showing the correlation structure. The title
+    reads "histogram-2d · plotnine · pyplots.ai" at the top. X-axis is labeled "X
+    Value" ranging from -4 to ~3, Y-axis is labeled "Y Value" ranging from -2 to 4.
+    A colorbar legend on the right shows "Count" with scale from ~10 to 40. The background
+    uses a minimal theme with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable with
+          appropriate font sizes (24pt title, 20pt axis labels, 16pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bins are well-sized (40 bins), density pattern clearly visible with
+          good color gradient
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but some empty space on left side of plot; plot
+          fills reasonable canvas area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Value" and "Y Value" but no units (though units
+          aren't really applicable for this abstract data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend is well-placed; however grid could be slightly
+          more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D histogram heatmap with rectangular bins
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly mapped to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes colorbar showing density scale, uses viridis colormap as
+          recommended
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete distribution
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled as "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-2d · plotnine · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows correlation pattern and density variation well, but could benefit
+          from showing different density regions more distinctly
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Bivariate normal distribution is a classic statistical example; plausible
+          but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standard normal distribution scale (-4 to 4) is appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics (ggplot + geom_bin2d + scale_fill_continuous),
+          but doesn't leverage advanced features like faceting or statistical transformations
+  verdict: APPROVED
diff --git a/plots/histogram-2d/metadata/pygal.yaml b/plots/histogram-2d/metadata/pygal.yaml
index 9f113d996f..928e512fca 100644
--- a/plots/histogram-2d/metadata/pygal.yaml
+++ b/plots/histogram-2d/metadata/pygal.yaml
@@ -26,3 +26,181 @@ review:
   - Heavy reliance on manual SVG construction rather than pygal native chart capabilities
   - No grid lines on the heatmap (could help with reading exact positions)
   - sys.path manipulation in code is non-standard
+  image_description: 'The plot displays a 2D histogram heatmap showing the joint distribution
+    of Customer Age (years) on the x-axis (18-75) and Annual Purchases (count) on
+    the y-axis (0-30). The main heatmap uses the viridis colormap (purple-blue-green-yellow),
+    with bright yellow/green areas showing high density around ages 30-40 with 10-15
+    purchases. A second smaller density cluster is visible around ages 50-60 with
+    fewer purchases. The plot includes marginal 1D histograms: a blue bar chart on
+    top showing the age distribution, and a horizontal bar chart on the right showing
+    purchase frequency distribution. A colorbar on the right shows the count scale
+    from 0 to 73. The title follows the correct format. All text is readable and properly
+    sized.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title, axis labels, tick labels, colorbar
+          labels all properly sized'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells well-sized, density patterns clearly visible, marginal
+          histograms proportionate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout with marginals, but square format uses space less efficiently
+          than landscape would
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Customer Age (years)", "Annual Purchases
+          (count)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid lines on heatmap (acceptable for this plot type), but no
+          legend needed anyway; minor deduction for no subtle grid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D histogram heatmap with rectangular bins
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=age, Y=purchases, color=count - all correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, viridis colormap used, marginal histograms included
+          (bonus feature from spec notes)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range (18-75, 0-30)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar labeled correctly as "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "histogram-2d · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows two density clusters demonstrating correlation, good variation
+          across bins. Minor deduction: could show more extreme density variation'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer age vs purchase frequency is a realistic market research
+          scenario mentioned in spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Ages 18-75 and 0-30 purchases are plausible; 7000 total points appropriate
+          for density visualization
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → histogram computation → SVG construction
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: sys.path manipulation is unusual, though has explanation (noqa comments
+          present)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Outputs plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal Style and creates SVG output, but primarily uses manual
+          SVG construction rather than pygal's native chart types. The implementation
+          acknowledges pygal's SVG nature but works around its limitations for this
+          complex visualization.
+  verdict: APPROVED
diff --git a/plots/histogram-2d/metadata/seaborn.yaml b/plots/histogram-2d/metadata/seaborn.yaml
index dacaecbb55..a5125b9073 100644
--- a/plots/histogram-2d/metadata/seaborn.yaml
+++ b/plots/histogram-2d/metadata/seaborn.yaml
@@ -24,3 +24,174 @@ review:
   - Axis labels are generic (X Value, Y Value) without units or more descriptive names
   - Layout could be slightly improved - the right marginal histogram appears somewhat
     cramped relative to the main plot
+  image_description: The plot displays a 2D histogram heatmap showing a bivariate
+    normal distribution with positive correlation. The main plot uses the viridis
+    colormap (dark purple for low counts through green to yellow for high counts ~50).
+    The central region shows the highest density (yellow/green) with density decreasing
+    outward (purple). Marginal 1D histograms appear on the top (X distribution) and
+    right (Y distribution) edges in a muted blue color (#306998). The title "histogram-2d
+    · seaborn · pyplots.ai" is clearly visible at the top. Axis labels show "X Value"
+    and "Y Value" with tick marks from approximately -4 to 4 on both axes. A colorbar
+    on the right indicates count values from 0 to ~50.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bins are well-sized at 40 bins, density clearly visible with viridis
+          colormap
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas with JointGrid, though the right marginal histogram
+          appears slightly cramped
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels "X Value" and "Y Value" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Colorbar present and well-labeled, but no grid on main plot (acceptable
+          for heatmaps)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D histogram heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned to continuous variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, viridis colormap, marginal histograms included
+          (optional but nice)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Count"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-2d · seaborn · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows correlation pattern well, but could demonstrate more varied
+          density patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Bivariate normal distribution is a classic statistical example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 5000 points is appropriate; values are sensible for standardized
+          data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and seaborn imported (matplotlib.pyplot not imported but
+          not needed with JointGrid)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's JointGrid and histplot effectively, but could leverage
+          sns.jointplot for more concise code
+  verdict: APPROVED
diff --git a/plots/histogram-basic/metadata/altair.yaml b/plots/histogram-basic/metadata/altair.yaml
index bac77fa34e..6b2a33c868 100644
--- a/plots/histogram-basic/metadata/altair.yaml
+++ b/plots/histogram-basic/metadata/altair.yaml
@@ -22,3 +22,172 @@ review:
   weaknesses:
   - No grid lines to aid in reading frequency values
   - Could add bar edge/stroke color for better visual definition between bins
+  image_description: The plot displays a histogram showing the distribution of human
+    heights in centimeters. The chart uses a solid blue color (#306998) for the bars.
+    The x-axis is labeled "Height (cm)" ranging from approximately 135 to 210 cm,
+    and the y-axis is labeled "Frequency" ranging from 0 to 110. The distribution
+    follows a clear bell-curve shape centered around 170 cm, with the peak frequency
+    of about 102 observations. The title "histogram-basic · altair · pyplots.ai" appears
+    at the top. The bars are adjacent with no gaps between them, creating a proper
+    histogram appearance. The layout is clean with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible; slight deduction as bars
+          could have subtle edge lines for better definition
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions overall, slight excess whitespace on the right side
+          due to x-axis extending to 210
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has "Height (cm)" with units, Y-axis has "Frequency"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid lines present (would improve readability), no legend needed
+          for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Single continuous variable correctly binned on x-axis, frequency
+          on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clear bin edges, readable labels, Y-axis starts at zero
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series histogram
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "histogram-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows unimodal normal distribution clearly; could demonstrate more
+          features like slight skewness or outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Human heights in cm is a perfect, relatable real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 170cm mean with 10cm std dev is realistic for human heights
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses declarative encoding and tooltips, but could leverage more Altair
+          features like interactivity
+  verdict: APPROVED
diff --git a/plots/histogram-basic/metadata/bokeh.yaml b/plots/histogram-basic/metadata/bokeh.yaml
index 54d51cc4b5..b8e137763a 100644
--- a/plots/histogram-basic/metadata/bokeh.yaml
+++ b/plots/histogram-basic/metadata/bokeh.yaml
@@ -25,3 +25,171 @@ review:
     interactivity
   - Data shows only a perfect normal distribution; adding slight skewness or a few
     outliers would better demonstrate histogram capabilities
+  image_description: The plot displays a histogram showing the distribution of human
+    heights in centimeters. The bars are rendered in a muted blue color (#306998)
+    with white borders and 0.8 alpha transparency. The distribution follows a clear
+    bell curve (normal distribution) centered around 170 cm, ranging from approximately
+    135 cm to 210 cm. The peak frequency is around 58 observations. The title "histogram-basic
+    · bokeh · pyplots.ai" appears in the top-left corner. The x-axis is labeled "Height
+    (cm)" and the y-axis "Frequency". Dashed grid lines are visible at alpha 0.3.
+    The Y-axis correctly starts at 0. The bars have no gaps between them, showing
+    proper histogram bin edges.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, though tick labels appear slightly
+          small relative to the large canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are clearly visible with good sizing and appropriate alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Height (cm)" and "Frequency"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend needed for single-series
+          histogram
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly binned and displayed as frequency
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clear bin edges, no gaps, readable labels, Y-axis at zero
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series histogram
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows normal distribution well, but could benefit from showing slight
+          skewness or outliers for richer demonstration
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Human heights in cm is an excellent, relatable real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Heights 135-210 cm with mean 170 cm and SD 10 cm are perfectly realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and quad glyphs correctly, but doesn't leverage
+          Bokeh's distinctive interactive features like hover tooltips which would
+          enhance the histogram
+  verdict: APPROVED
diff --git a/plots/histogram-basic/metadata/highcharts.yaml b/plots/histogram-basic/metadata/highcharts.yaml
index 376bbbe8f1..b1f59d433e 100644
--- a/plots/histogram-basic/metadata/highcharts.yaml
+++ b/plots/histogram-basic/metadata/highcharts.yaml
@@ -26,3 +26,172 @@ review:
   - Data is purely generic normal distribution without a specific real-world context
     in the visualization
   - Could leverage Highcharts tooltips or data labels for enhanced interactivity
+  image_description: The plot displays a histogram showing a normal distribution centered
+    around 65. The chart uses a solid blue color (#306998) for the bars with a darker
+    blue border. The title "histogram-basic · highcharts · pyplots.ai" appears at
+    the top in large bold text. The Y-axis is labeled "Frequency" and ranges from
+    0 to 102. The X-axis shows bin ranges (e.g., 16-23, 23-31, ..., 116-123) with
+    labels rotated at approximately 315 degrees. The bars have no gaps between them,
+    correctly representing histogram bins. The distribution peaks at the 66-73 bin
+    with a frequency of ~96, showing a classic bell curve shape with 500 data points.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and clear (72px), axis labels are readable (48px titles,
+          32-36px tick labels), though Y-axis tick labels are slightly dense
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, X-axis labels rotated to avoid collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are clearly visible with appropriate sizing for the data
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good proportions, adequate margins (marginBottom: 250 for rotated
+          labels)'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Frequency" label, X-axis has "Value Range" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid is subtle (alpha via #e0e0e0), legend correctly hidden for
+          single series'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct histogram using column chart with no gaps (pointPadding:
+          0, groupPadding: 0)'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly binned and frequencies displayed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clear bin edges, readable axis labels, Y-axis starts at zero
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows unimodal normal distribution well, but could show more interesting
+          distribution features (e.g., slight skew or outliers)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic normal data (loc=65, scale=15) is plausible for many contexts
+          (e.g., test scores), but lacks explicit context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic (centered at 65 with std of 15)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts column series with proper styling, but doesn't leverage
+          more advanced Highcharts features like tooltips or data labels
+  verdict: APPROVED
diff --git a/plots/histogram-basic/metadata/letsplot.yaml b/plots/histogram-basic/metadata/letsplot.yaml
index d817b252c0..e00da408fe 100644
--- a/plots/histogram-basic/metadata/letsplot.yaml
+++ b/plots/histogram-basic/metadata/letsplot.yaml
@@ -24,3 +24,167 @@ review:
     frequency reading
   - Does not leverage lets-plot distinctive features like interactive tooltips or
     custom scales
+  image_description: 'The plot shows a histogram of height data in centimeters (x-axis:
+    140-205 cm, y-axis: Frequency 0-60). The bars are filled with a blue color (#306998)
+    with white borders. The distribution shows a slight bimodality with peaks around
+    165 cm and 178 cm, representing combined male and female height distributions.
+    The title "histogram-basic · letsplot · pyplots.ai" appears at the top left. The
+    plot uses a minimal theme with subtle gridlines on a white background. All text
+    is clearly readable, and the layout is well-balanced with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at proper
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars are well-sized with appropriate bin count (30 bins)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, proper whitespace, no cut-off elements
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has units "Height (cm)", Y-axis shows "Frequency"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No visible grid lines despite specification noting clear bin edges
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable correctly mapped to x-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clear bin edges with white borders, readable axes, Y starts at zero
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-variable histogram
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "histogram-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution demonstrating histogram capabilities,
+          though subtle
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Human height data is a realistic, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Heights 140-205 cm are realistic human measurements
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Basic ggplot grammar used, but no lets-plot specific features like
+          interactive tooltips or distinctive scales
+  verdict: APPROVED
diff --git a/plots/histogram-basic/metadata/matplotlib.yaml b/plots/histogram-basic/metadata/matplotlib.yaml
index 6df2d6f335..ae8268ca65 100644
--- a/plots/histogram-basic/metadata/matplotlib.yaml
+++ b/plots/histogram-basic/metadata/matplotlib.yaml
@@ -24,3 +24,173 @@ review:
     variations
   - Data distribution is purely normal; could show slight asymmetry or a few outliers
     for richer feature demonstration
+  image_description: The plot displays a histogram of exam scores using 20 bins. Bars
+    are rendered in a muted blue color (#306998) with white edge lines (1.5px) separating
+    each bin clearly. The distribution is approximately normal, centered around 72-75
+    points, with frequencies ranging from 0 to ~60 students per bin. The x-axis ranges
+    from ~30 to 100 (exam scores in points), and the y-axis shows frequency counts
+    starting at 0. The title "histogram-basic · matplotlib · pyplots.ai" appears at
+    the top. Axis labels are descriptive with units ("Exam Score (points)" and "Frequency
+    (count)"). A subtle dashed y-axis grid (alpha 0.3) aids readability. The layout
+    is clean with good proportions and no clutter.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized, white edges clearly separate bins, alpha 0.85 provides
+          good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, tight_layout applied, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units in parentheses
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid only on y-axis is fine for histograms, but no legend needed
+          here (N/A - deducting 0)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Single continuous variable correctly binned on x-axis, frequency
+          on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clear bin edges with no gaps (edgecolor="white"), readable labels,
+          appropriate bin count (20), y-axis starts at zero
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, x-axis spans full score range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series histogram (N/A - full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows normal distribution shape clearly, but could demonstrate more
+          features like outliers or slight skewness for richer visualization
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Exam scores with mean=72, std=12 is a very realistic educational
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores clipped to 0-100 range, realistic exam score distribution
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current ax.hist() API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses standard hist() with basic parameters; could showcase matplotlib-specific
+          features like histtype, density normalization, or statistical annotations
+  verdict: APPROVED
diff --git a/plots/histogram-basic/metadata/plotly.yaml b/plots/histogram-basic/metadata/plotly.yaml
index f80b4c23ec..21ef7d60a0 100644
--- a/plots/histogram-basic/metadata/plotly.yaml
+++ b/plots/histogram-basic/metadata/plotly.yaml
@@ -26,3 +26,174 @@ review:
     histogram ability to reveal distribution shape
   - Could leverage Plotly hover template feature to show bin ranges and counts on
     hover in the HTML version
+  image_description: The plot displays a histogram of exam scores with a blue color
+    (#306998). The x-axis shows "Score (points)" ranging from approximately 45 to
+    100, and the y-axis shows "Frequency (count)" ranging from 0 to 45. The title
+    correctly reads "histogram-basic · plotly · pyplots.ai" centered at the top. The
+    histogram shows a slightly right-skewed distribution with a main peak around 75
+    points and a secondary bump around 88-90, reflecting the bimodal data generation
+    (main distribution at 72, high performers at 88). Bars have white borders creating
+    clear separation. The background is clean white with subtle gray gridlines. Overall
+    layout is well-balanced with good margins.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are clearly visible with appropriate sizing for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) with good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, proper margins, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Score (points)" and "Frequency (count)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.1), but no legend needed for single-series
+          histogram (-1 for grid being perhaps too subtle)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis, frequency on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clear bin edges with no gaps (bargap=0), readable labels, appropriate
+          bin count (20)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at zero (rangemode='tozero'), all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series histogram, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "histogram-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution with main peak and secondary bump, good
+          spread; could show more extreme outliers (-2)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Exam scores is a perfect real-world scenario for histograms
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores clipped to 0-100 range which is realistic; 220 data points
+          is good (-1 for distribution being slightly artificial)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set correctly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses go.Histogram correctly with proper Plotly layout customization;
+          also generates interactive HTML output which is a Plotly strength; however,
+          doesn't leverage hover templates or other advanced Plotly interactivity
+          features
+  verdict: APPROVED
diff --git a/plots/histogram-basic/metadata/plotnine.yaml b/plots/histogram-basic/metadata/plotnine.yaml
index 5863430f91..fffc7b3fc4 100644
--- a/plots/histogram-basic/metadata/plotnine.yaml
+++ b/plots/histogram-basic/metadata/plotnine.yaml
@@ -26,3 +26,172 @@ review:
   - Data extends beyond 100 which is atypical for test scores on a 0-100 scale
   - Distribution is purely symmetric normal - could show more interesting features
     like slight skewness
+  image_description: The plot displays a histogram showing the distribution of test
+    scores. The bars are filled with a blue color (#306998) with white borders, using
+    an alpha of 0.85 for slight transparency. The x-axis is labeled "Test Score" ranging
+    from approximately 30 to 115, and the y-axis is labeled "Frequency" ranging from
+    0 to about 62. The title "histogram-basic · plotnine · pyplots.ai" appears at
+    the top. The distribution shows a classic bell curve (normal distribution) centered
+    around 70, with the peak frequency around 62 occurrences. The plot uses a minimal
+    theme with a clean white background and subtle grid lines. The 16:9 aspect ratio
+    provides good horizontal space for the histogram bars.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars clearly visible with appropriate width for 25 bins
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, data centered with appropriate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Test Score" and "Frequency" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid from theme_minimal, no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis, frequency on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clear bin edges with white borders, readable labels, appropriate
+          bin count
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series histogram
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows normal distribution well, but could show more interesting features
+          like slight skewness or outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores centered at 70 with std dev of 12 is a realistic exam
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values mostly realistic, though some scores exceed 100 which is unusual
+          for typical tests
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_histogram and theme_minimal, but could
+          leverage more plotnine-specific features like faceting or statistical transformations
+  verdict: APPROVED
diff --git a/plots/histogram-basic/metadata/pygal.yaml b/plots/histogram-basic/metadata/pygal.yaml
index b6e92cb425..07693cd98f 100644
--- a/plots/histogram-basic/metadata/pygal.yaml
+++ b/plots/histogram-basic/metadata/pygal.yaml
@@ -23,3 +23,185 @@ review:
   - Grid styling could be more subtle (currently dotted lines are visible but acceptable)
   - Does not customize pygal tooltips which is a distinctive feature
   - Histogram bars lack visible borders which would help distinguish adjacent bins
+  image_description: The plot displays a histogram of exam scores using pygal. The
+    chart has a white background with the title "histogram-basic · pygal · pyplots.ai"
+    centered at the top in dark gray text. The X-axis is labeled "Exam Score (points)"
+    ranging from approximately 25 to 100, and the Y-axis is labeled "Number of Students"
+    ranging from 0 to about 57. The histogram bars are rendered in a muted blue color
+    (#306998) with clear edges and no gaps between adjacent bins. The distribution
+    appears roughly normal/bell-shaped, centered around 72-75 points, with a peak
+    frequency of approximately 57 students. There are subtle horizontal dotted grid
+    lines for the Y-axis. The histogram shows the characteristic shape of exam score
+    data - low frequencies at the extremes (very low scores around 25-35 and high
+    scores near 100), with the bulk of students scoring in the 60-85 range.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable. Font
+          sizes are well-scaled for the 4800x2700 canvas, though tick labels could
+          be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Histogram bars are clearly visible with good sizing. The bars could
+          benefit from a subtle border to better distinguish adjacent bins.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe. Good contrast against
+          white background.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-proportioned layout with appropriate margins. No cut-off content.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Exam Score (points)"
+          and "Number of Students".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid lines are subtle (dotted), but legend is hidden which is appropriate
+          for single-series histogram. However, no legend shown for the "Distribution"
+          series - acceptable but -2 for the series add without visible legend benefit.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram chart type using pygal.Histogram().
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to histogram bins with frequency counts.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: clear bin edges, readable labels, appropriate
+          bin count (20 bins), Y-axis starts at zero.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range (25-100 for scores, 0-57 for frequency).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend hidden (show_legend=False) which is appropriate for single-series
+          histogram.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-basic · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows normal distribution shape well. Distribution is unimodal; could
+          show more variation (e.g., slight skew or outlier visibility is present
+          at edges) but the clipping at 0-100 creates interesting tail behavior.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Exam scores are a perfect real-world scenario. Mean of 72 with std
+          of 14 is realistic for exam data.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 500 observations is appropriate. Scores clipped to 0-100 range. Values
+          are realistic for exam scores.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save. No functions
+          or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style imported - all used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png and plot.html (both correct for pygal).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal.Histogram with proper (count, start, end) tuple format.
+          Uses custom Style for theming. Generates both SVG-based HTML and PNG. Does
+          not leverage pygal's built-in interactivity features (tooltips are default
+          but not customized).
+  verdict: APPROVED
diff --git a/plots/histogram-basic/metadata/seaborn.yaml b/plots/histogram-basic/metadata/seaborn.yaml
index f90328e15a..820cc970f6 100644
--- a/plots/histogram-basic/metadata/seaborn.yaml
+++ b/plots/histogram-basic/metadata/seaborn.yaml
@@ -23,3 +23,147 @@ review:
     from matplotlib
   - Data shows only unimodal distribution; adding slight skew or outliers would better
     demonstrate histogram capabilities
+  image_description: The plot displays a histogram of test scores with a clear bell-curve
+    (normal) distribution. The bars are rendered in a muted blue color (#306998) with
+    white edges separating each bin. The x-axis shows "Test Score (points)" ranging
+    from approximately 35 to 120, while the y-axis shows "Frequency (count)" ranging
+    from 0 to 60. The distribution is centered around 75-80 points with the highest
+    bars reaching about 59 counts. The title "histogram-basic · seaborn · pyplots.ai"
+    is clearly visible at the top. A subtle dashed grid is applied to the y-axis only.
+    The layout is well-balanced with no overlapping elements.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text perfectly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bars well-sized with clear white edges
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: single accessible blue color
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: y-axis grid only, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct histogram type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: values on x-axis, frequency on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: clear bin edges, readable labels, y-axis at zero
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-variable
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: false
+        comment: shows normal distribution but lacks outliers/multimodality
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: test scores is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: realistic values for test scores
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean linear flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: fixed seed set
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn histplot correctly
+        score: 3
+        max: 5
+        passed: false
+        comment: no KDE or statistical features
+  verdict: APPROVED
diff --git a/plots/histogram-cumulative/metadata/altair.yaml b/plots/histogram-cumulative/metadata/altair.yaml
index 1127f6b2c8..30e1e86932 100644
--- a/plots/histogram-cumulative/metadata/altair.yaml
+++ b/plots/histogram-cumulative/metadata/altair.yaml
@@ -23,3 +23,173 @@ review:
   - Could add reference lines (e.g., median at 50% mark) to enhance readability
   - Missing explicit legend even though single series (minor)
   - Library features score lower due to not using Altair interactive selection capabilities
+  image_description: The plot displays a cumulative histogram (ogive) showing electricity
+    usage in kWh on the x-axis (ranging from ~80 to 700 kWh) and cumulative proportion
+    on the y-axis (ranging from 0.00 to 1.00). The visualization uses a step-after
+    interpolation style with a filled area chart in a muted blue color (#306998) with
+    slight transparency. The title "histogram-cumulative · altair · pyplots.ai" appears
+    at the top center. Both axes have clear, readable labels with appropriate font
+    sizes. The grid is subtle with light gray lines. The step pattern clearly shows
+    the monotonically increasing cumulative distribution, revealing a bimodal pattern
+    with steeper sections around 200-300 kWh (low usage cluster) and 400-500 kWh (medium
+    usage cluster).
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step area chart with good opacity (0.7) and clear line stroke (3px)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, slight margin imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Electricity Usage (kWh)" and "Cumulative Proportion" - descriptive
+          with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate (alpha 0.3), but no legend present (though not
+          strictly needed for single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative histogram displayed as step chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = bin edges (usage values), Y = cumulative proportion
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows cumulative proportion, monotonically increasing, step function
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data from ~80 to 700 kWh, 0 to 1 proportion
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-cumulative · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution with two clusters (low and medium usage),
+          demonstrates cumulative nature well, could show more extreme edge cases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly household electricity usage is a realistic, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 80-700 kWh are realistic for household monthly usage, though
+          some values might be slightly high for the "low usage" category
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct scale factor
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses mark_area with step interpolation and tooltips, but could leverage
+          more Altair-specific features like selections or layered marks
+  verdict: APPROVED
diff --git a/plots/histogram-cumulative/metadata/bokeh.yaml b/plots/histogram-cumulative/metadata/bokeh.yaml
index 8eac39ef6d..5e753cc3e9 100644
--- a/plots/histogram-cumulative/metadata/bokeh.yaml
+++ b/plots/histogram-cumulative/metadata/bokeh.yaml
@@ -24,3 +24,176 @@ review:
   - Could leverage HoverTool to show percentile information on hover
   - No reference lines showing common percentiles (p50, p90, p99) which would enhance
     the cumulative histogram utility
+  image_description: The plot displays a cumulative histogram showing web service
+    response times. The visualization features a blue step function line (#306998)
+    with a light blue filled area underneath. Yellow circular markers (#FFD43B) with
+    blue borders are placed at each bin edge to mark cumulative count values. The
+    x-axis shows "Response Time (ms)" ranging from 0 to approximately 700ms, and the
+    y-axis shows "Cumulative Count" ranging from 0 to 500. The title correctly follows
+    the format "histogram-cumulative · bokeh · pyplots.ai". The background is a light
+    gray (#fafafa) with dashed grid lines at 0.3 alpha. The curve is monotonically
+    increasing as expected for a cumulative histogram, starting at 0 and reaching
+    500 (total sample size). There's a Bokeh logo/toolbar visible in the top-right
+    corner.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step line and markers are visible; markers could be slightly larger
+          for this canvas size
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, slight empty area on right where data tapers
+          off
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Response Time (ms)" and "Cumulative
+          Count"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Dashed grid at alpha 0.3 is good, but Bokeh toolbar/logo visible
+          in corner detracts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative histogram with step function visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows response time bins, Y-axis shows cumulative count
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows cumulative count, monotonically increasing, step function display
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible (0-700ms, 0-500 count)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-cumulative · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cumulative nature well; could better demonstrate percentile
+          lookups with reference lines
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Web service response times is a real, neutral, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times 10-700ms are realistic; mixing exponential and normal
+          distributions creates interesting shape
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and basic plotting, but doesn't leverage Bokeh's
+          interactive features (HoverTool for showing percentiles, annotations, etc.)
+  verdict: APPROVED
diff --git a/plots/histogram-cumulative/metadata/highcharts.yaml b/plots/histogram-cumulative/metadata/highcharts.yaml
index b64193bc36..d1caa1510e 100644
--- a/plots/histogram-cumulative/metadata/highcharts.yaml
+++ b/plots/histogram-cumulative/metadata/highcharts.yaml
@@ -23,3 +23,184 @@ review:
   - Legend could be positioned more optimally (e.g., inside the plot area in lower-right
     corner)
   - Output image height slightly differs from specified 2700px
+  image_description: The plot displays a cumulative histogram showing student test
+    score distribution. The chart uses a step-area visualization with a blue color
+    (#306998) and gradient fill transitioning from semi-transparent blue at the top
+    to nearly transparent at the bottom. The title "histogram-cumulative · highcharts
+    · pyplots.ai" appears at the top in bold, with a subtitle "Student Test Score
+    Distribution (n=280)" below it. The x-axis shows "Test Score" ranging from 0 to
+    100, and the y-axis shows "Cumulative Percentage (%)" from 0% to 100%. Blue circular
+    markers appear at each data point along the step function. The curve is monotonically
+    increasing as expected for a cumulative distribution, with steeper slopes in the
+    40-80 range indicating where most scores fall. Grid lines are subtle light gray.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels and tick labels are all clearly readable.
+          Font sizes are appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the chart.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (radius 10), line width is good (4px). The
+          step pattern is clearly visible. Minor deduction as markers could be slightly
+          larger.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses single blue color (#306998), colorblind-safe, no red-green issues.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good margins and spacing. Plot area is well-utilized. Minor wasted
+          space on the right side.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Test Score" and "Cumulative Percentage (%)" are descriptive with
+          units.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid is subtle (light gray #e0e0e0). Legend is enabled but positioned
+          by default rather than optimally placed.'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative histogram using step-area chart, showing monotonically
+          increasing cumulative distribution.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis correctly shows bin values (score), Y-axis shows cumulative
+          percentage.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows cumulative count as percentage, step function appearance, proper
+          bin boundaries.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows 0-100 (full score range), Y-axis shows 0-100%.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Cumulative Distribution" label is accurate.'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "histogram-cumulative · highcharts · pyplots.ai" format.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cumulative distribution with interesting shape (multimodal
+          underlying data creates visible inflection points). The S-curve shape demonstrates
+          percentile concepts well.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Student test scores is a realistic, neutral educational scenario.
+          The n=280 sample size is reasonable.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores clipped to 0-100 are realistic. The tri-modal distribution
+          (struggling/average/high performers) is plausible but somewhat stylized.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart config → export. No functions
+          or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible results.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, highcharts_core, selenium, etc.).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts_core API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Note: Image height is 2561px instead of specified 2700px (likely
+          browser rendering difference).'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts step area chart with gradient fill, proper series
+          configuration. Could leverage more Highcharts-specific features like tooltips
+          configuration or animation settings.
+  verdict: APPROVED
diff --git a/plots/histogram-cumulative/metadata/letsplot.yaml b/plots/histogram-cumulative/metadata/letsplot.yaml
index 250593a583..684c06e5e4 100644
--- a/plots/histogram-cumulative/metadata/letsplot.yaml
+++ b/plots/histogram-cumulative/metadata/letsplot.yaml
@@ -22,3 +22,12 @@ review:
   weaknesses:
   - Could use geom_histogram with cumulative parameter if available, though geom_rect
     approach is valid
+  image_description: The plot displays a cumulative histogram of API response times
+    in milliseconds using a blue color (#306998) with darker borders (#1e4a6e). The
+    x-axis shows "Response Time (ms)" ranging from 0 to ~750ms, and the y-axis shows
+    "Cumulative Count" from 0 to 500. The histogram exhibits a characteristic cumulative
+    distribution shape - steep initial rise (many fast responses) then gradually flattening
+    (fewer slow responses). The title "histogram-cumulative · letsplot · pyplots.ai"
+    is prominently displayed at the top. The plot uses a minimal theme with subtle
+    gray gridlines and good use of canvas space.
+  verdict: APPROVED
diff --git a/plots/histogram-cumulative/metadata/matplotlib.yaml b/plots/histogram-cumulative/metadata/matplotlib.yaml
index d9715cc051..1ba1b41280 100644
--- a/plots/histogram-cumulative/metadata/matplotlib.yaml
+++ b/plots/histogram-cumulative/metadata/matplotlib.yaml
@@ -27,3 +27,180 @@ review:
     or "Cumulative Probability")
   - Could use more distinctive matplotlib features like axhspan for percentile bands
     or secondary y-axis for counts
+  image_description: The plot displays a cumulative histogram showing exam score distribution.
+    The chart uses a blue (#306998) step function with a light blue filled area underneath.
+    The x-axis shows "Exam Score" ranging from 0 to 100, and the y-axis shows "Cumulative
+    Proportion" from 0.0 to 1.0. Yellow dashed reference lines mark the 25th, 50th,
+    75th, and 90th percentiles with annotations showing the corresponding score values
+    (58, 69, 82, and 88 respectively). The title follows the correct format "histogram-cumulative
+    · matplotlib · pyplots.ai". A legend in the lower right shows "Cumulative Distribution".
+    The grid is subtle with dashed lines at alpha 0.3.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, annotations
+          at 14pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All text elements are well-positioned; percentile annotations strategically
+          placed to avoid overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step histogram with linewidth=3 is clearly visible; filled area with
+          alpha=0.3 enhances visibility without obscuring
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast; colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with tight_layout(); slight deduction for
+          y-axis extending to 1.05 creating minor empty space at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Exam Score" and "Cumulative Proportion" are descriptive but lack
+          units (though proportion is unitless, score could have "points")'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at alpha=0.3 is appropriately subtle; legend well-placed in
+          lower right but could be smaller
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative histogram with step display as specified
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows values, Y-axis shows cumulative proportion - correctly
+          mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows cumulative count normalized, step function display, percentile
+          reference lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 0-100 (full exam range), Y-axis 0-1.05 shows complete distribution
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Cumulative Distribution" accurately describes the plot element'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows monotonically increasing step function with percentile markers;
+          could demonstrate more variation in distribution shape
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Exam scores with realistic distribution (average ~65, high performers
+          ~85, lower performers ~45) - educational context is neutral and appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores 0-100, realistic distribution with median ~69, all values
+          plausible for exam scores
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save, no functions or
+          classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current for matplotlib 3.x
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses matplotlib's hist() with cumulative and histtype parameters
+          effectively, but could leverage more matplotlib-specific features like axhspan
+          for percentile bands or twin axes
+  verdict: APPROVED
diff --git a/plots/histogram-cumulative/metadata/plotly.yaml b/plots/histogram-cumulative/metadata/plotly.yaml
index 39a1a9c783..b7103e477e 100644
--- a/plots/histogram-cumulative/metadata/plotly.yaml
+++ b/plots/histogram-cumulative/metadata/plotly.yaml
@@ -24,3 +24,177 @@ review:
   - Uses manual histogram calculation with np.histogram instead of Plotly native go.Histogram
     with cumulative_enabled=True
   - Legend position in upper-left could be moved to avoid potential visual overlap
+  image_description: The plot displays a cumulative histogram showing test score distribution.
+    The main cumulative distribution curve is rendered as a step function in Python
+    Blue (#306998) with a light blue fill beneath it (tozeroy fill). The x-axis shows
+    "Test Score (points)" ranging from 0 to 100, and the y-axis shows "Cumulative
+    Proportion" ranging from 0% to 100%. Three horizontal dashed yellow lines mark
+    the 25th, 50th, and 75th percentiles, with corresponding vertical dashed lines
+    dropping down to the x-axis. Yellow circular markers with blue outlines are placed
+    at each percentile intersection point, labeled with "25%", "50%", and "75%" text.
+    The title "histogram-cumulative · plotly · pyplots.ai" is centered at the top.
+    A legend in the upper-left corner identifies the "Cumulative Distribution" trace.
+    The background uses the plotly_white template with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step line width of 4 is clearly visible, percentile markers at size
+          14 are prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: X-axis has units "(points)" but Y-axis lacks units (proportion is
+          unitless, but could specify "of total")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle at 0.1 alpha (good), but legend could be better positioned
+          (top-left overlaps with visual area slightly)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative histogram as step function
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows bin edges (score values), Y shows cumulative proportion
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows cumulative count/proportion, monotonically increasing, step
+          function display
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 0-105, Y-axis 0-105% shows all data clearly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Cumulative Distribution" label is accurate'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-cumulative · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multimodal distribution with low/average/high performers, demonstrates
+          percentile lookups
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Test scores is a good scenario, but mixing three normal distributions
+          is somewhat artificial
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores 0-100 is realistic for test scores
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of hover templates with custom formatting, but doesn't leverage
+          Plotly's built-in histogram with cumulative=True parameter which would be
+          more idiomatic
+  verdict: APPROVED
diff --git a/plots/histogram-cumulative/metadata/plotnine.yaml b/plots/histogram-cumulative/metadata/plotnine.yaml
index f8a0bd069c..95f6832a04 100644
--- a/plots/histogram-cumulative/metadata/plotnine.yaml
+++ b/plots/histogram-cumulative/metadata/plotnine.yaml
@@ -21,3 +21,169 @@ review:
   weaknesses:
   - Manual histogram calculation with numpy instead of exploring plotnine native stat_bin
     with cumulative transformation
+  image_description: The plot displays a cumulative histogram showing product shelf
+    life measurements in days. The visualization uses blue bars (#306998) with darker
+    borders (#1a3d5c) arranged in ascending stair-step fashion from left to right.
+    The x-axis shows "Shelf Life (days)" ranging from approximately 20 to 75 days,
+    while the y-axis displays "Cumulative Count" from 0 to 450. The title "histogram-cumulative
+    · plotnine · pyplots.ai" appears at the top. The bars form a characteristic S-curve
+    pattern typical of cumulative distributions, showing slower accumulation at the
+    tails and steeper growth in the middle range. The background uses a minimal theme
+    with subtle gray grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible with good contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but some empty space on the left side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has units "(days)", Y-axis is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle, but no legend needed for this single-series plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative histogram as bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows bin values, Y shows cumulative count correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Monotonically increasing, shows cumulative totals
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: histogram-cumulative · plotnine · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution accumulation well, though the bimodality
+          could be more pronounced in the cumulative view
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product shelf life is a neutral, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 45-65 day shelf life is realistic for products
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but uses verbose=False which is fine
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_bar and theme_minimal, but manual histogram
+          calculation instead of native geom_histogram with cumulative option
+  verdict: APPROVED
diff --git a/plots/histogram-cumulative/metadata/pygal.yaml b/plots/histogram-cumulative/metadata/pygal.yaml
index 2981fed097..6557593aba 100644
--- a/plots/histogram-cumulative/metadata/pygal.yaml
+++ b/plots/histogram-cumulative/metadata/pygal.yaml
@@ -24,3 +24,169 @@ review:
   - Could use pygal native Histogram() chart type instead of manual computation with
     Bar()
   - Missing tooltips configuration to enhance interactivity in HTML output
+  image_description: The plot displays a cumulative histogram showing test score distributions.
+    It uses a clean blue color (#306998 - Python blue) for all bars on a white background.
+    The title "histogram-cumulative · pygal · pyplots.ai" appears at the top. The
+    x-axis is labeled "Test Score Range" with 20 bin labels (33-36 through 96-100)
+    rotated at 45 degrees. The y-axis shows "Cumulative Proportion" ranging from 0
+    to 1. The bars show a monotonically increasing S-curve pattern typical of a cumulative
+    distribution, starting near 0 for low scores and reaching 1.0 at the highest bin.
+    Horizontal grid lines are visible at 0.1 intervals. The layout is well-balanced
+    with the chart occupying most of the canvas.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; tick labels are slightly
+          small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; 45-degree rotation handles x-axis labels well
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight extra whitespace on left
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Test Score Range" and "Cumulative Proportion"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed (single series), but grid lines could be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative histogram representation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows score ranges, Y shows cumulative proportion correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Monotonically increasing, shows cumulative counts/proportions
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range from 0 to 1 displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, appropriate for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-cumulative · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows full cumulative distribution with S-curve shape; could show
+          comparison to theoretical CDF
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores from 500 students is a plausible, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores 0-100 are realistic; normal distribution with mean 72 is sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Basic Bar chart usage; could use pygal.Histogram() or tooltips for
+          interactivity
+  verdict: APPROVED
diff --git a/plots/histogram-cumulative/metadata/seaborn.yaml b/plots/histogram-cumulative/metadata/seaborn.yaml
index 3714a099b7..95e14467fe 100644
--- a/plots/histogram-cumulative/metadata/seaborn.yaml
+++ b/plots/histogram-cumulative/metadata/seaborn.yaml
@@ -28,3 +28,177 @@ review:
     with the percentile reference lines'
   - 'Minor: Right-side whitespace after data ends (~500ms) could be reduced by setting
     xlim closer to the actual data range'
+  image_description: 'The plot shows a cumulative histogram (step function) displaying
+    web API response times on a blue filled area chart. The x-axis shows "Response
+    Time (ms)" ranging from 0 to 600, and the y-axis shows "Cumulative Proportion"
+    from 0.0 to just above 1.0. The histogram uses a muted blue color (#306998) with
+    filled area and step edges. Four percentile reference lines are displayed: P50
+    at 71ms (yellow), P90 at 359ms (orange), P95 at 403ms (red-orange), and P99 at
+    461ms (crimson). Each percentile has both horizontal and vertical dashed lines
+    intersecting at the percentile point, with labeled annotations in white boxes
+    with colored borders. The title "histogram-cumulative · seaborn · pyplots.ai"
+    appears at the top. The grid is subtle with alpha transparency.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; percentile annotations are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step histogram with fill and linewidth 2.5 is clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow/orange/red gradient for percentiles is colorblind-friendly
+          with distinct luminance
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight imbalance with right-side whitespace after
+          data ends at ~500ms
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Response Time (ms)", "Cumulative
+          Proportion"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend for the percentile lines
+          (annotations serve as inline legend, acceptable but not ideal)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct cumulative histogram with step function display
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis is the numeric variable, Y-axis is cumulative proportion (0-1)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows cumulative count/proportion, monotonically increasing, step
+          display, practical percentile markers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-600ms range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Inline percentile annotations are accurate and descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-cumulative · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows the S-curve characteristic of cumulative histograms well, demonstrates
+          percentile reading use case, but could show more distinct distribution shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Web API response times is a perfect real-world scenario for cumulative
+          histograms (SLA monitoring, percentile tracking)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times 5-600ms are realistic; the tri-modal distribution
+          (fast/moderate/slow) is plausible for real API traffic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (matplotlib, numpy, seaborn) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API (histplot with cumulative=True)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of seaborn's `histplot` with `cumulative=True`, `stat="proportion"`,
+          and `element="step"` parameters - these are seaborn-specific features that
+          simplify cumulative histogram creation
+  verdict: APPROVED
diff --git a/plots/histogram-density/metadata/altair.yaml b/plots/histogram-density/metadata/altair.yaml
index 733c6b4751..b58f4921dd 100644
--- a/plots/histogram-density/metadata/altair.yaml
+++ b/plots/histogram-density/metadata/altair.yaml
@@ -23,3 +23,174 @@ review:
   - Missing legend to identify the KDE line (yellow line has no label)
   - Could use Altair's built-in transform_density() instead of scipy for more idiomatic
     implementation
+  image_description: The plot displays a density histogram with blue rectangular bars
+    (#306998 color with darker blue stroke) showing the distribution of reaction times
+    in milliseconds. The x-axis is labeled "Reaction Time (ms)" ranging from approximately
+    100 to 540, and the y-axis shows "Density (probability per ms)" ranging from 0
+    to 0.0120. A yellow KDE (kernel density estimate) line (#FFD43B) smoothly overlays
+    the histogram bars, clearly showing a bimodal distribution with a primary peak
+    around 250ms and a secondary smaller peak around 380ms. The title "histogram-density
+    · altair · pyplots.ai" appears at the top center. The plot has a clean white background
+    with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars have good visibility with stroke outlines; KDE line is prominent
+          with 4px width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue bars and yellow line provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good canvas utilization, plot fills majority of space; minor: left
+          margin slightly wider than necessary'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Reaction Time (ms)" and "Density (probability
+          per ms)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend for KDE line
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density histogram with normalized y-axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X correctly shows continuous variable, Y shows density
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Density normalization present, KDE overlay added as per spec suggestion
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate axis scaling
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend for KDE line to identify what the yellow line represents
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-density · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Bimodal distribution excellently demonstrates density histogram;
+          shows both peaks clearly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction times from baseline vs fatigued conditions - realistic cognitive
+          psychology scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 250ms and 380ms reaction times are realistic for cognitive tasks
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save structure maintained
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (altair, numpy, pandas, scipy.stats)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses mark_rect for histogram bars, layered chart composition, tooltips
+          enabled; could have used alt.Chart().transform_density() for more idiomatic
+          Altair
+  verdict: APPROVED
diff --git a/plots/histogram-density/metadata/bokeh.yaml b/plots/histogram-density/metadata/bokeh.yaml
index 3fe56d06c8..2f7efa8d69 100644
--- a/plots/histogram-density/metadata/bokeh.yaml
+++ b/plots/histogram-density/metadata/bokeh.yaml
@@ -25,3 +25,173 @@ review:
   - Does not leverage Bokeh interactive capabilities like hover tooltips to show bin
     values
   - Minor empty space on the right side of the plot
+  image_description: 'The plot displays a density histogram of test scores with blue
+    histogram bars (color #306998) showing the empirical distribution. The bars have
+    white borders and slight transparency (alpha 0.7). A yellow/gold smooth curve
+    (Normal PDF with μ=75, σ=12) overlays the histogram. The title "histogram-density
+    · bokeh · pyplots.ai" appears at the top left. The x-axis is labeled "Test Score"
+    (ranging from ~40 to ~120), and the y-axis is labeled "Density (Probability per
+    Unit)" (ranging from 0 to ~0.035). A legend in the top-left shows "Empirical Distribution"
+    and "Normal PDF (μ=75, σ=12)". The grid is subtle with dashed lines. The plot
+    uses a 16:9 landscape format at 4800×2700 pixels.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are readable, though at full resolution
+          the legend text could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars and PDF line are clearly visible with good sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though histogram is slightly left-heavy
+          with empty space on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Test Score" and "Density (Probability
+          per Unit)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed styling and alpha=0.3, but legend text
+          is quite small and hard to read at full resolution
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density histogram with PDF overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis, density on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes density normalization and theoretical PDF overlay as suggested
+          in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies both elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: histogram-density · bokeh · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows bell-curve distribution with natural variation, demonstrates
+          density normalization well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores with μ=75, σ=12 is a plausible educational scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 500 observations, scores in 40-120 range with μ=75 is realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic Bokeh plotting (quad, line) but doesn't leverage interactive
+          features like hover tooltips, which would showcase Bokeh's strengths
+  verdict: APPROVED
diff --git a/plots/histogram-density/metadata/highcharts.yaml b/plots/histogram-density/metadata/highcharts.yaml
index d562dbfda2..0c395d068e 100644
--- a/plots/histogram-density/metadata/highcharts.yaml
+++ b/plots/histogram-density/metadata/highcharts.yaml
@@ -23,3 +23,173 @@ review:
     appearance
   - Legend positioned in upper right could potentially overlap with tall bars in some
     datasets
+  image_description: The plot displays a density histogram showing test score distribution
+    with a theoretical normal PDF overlay. The histogram uses **blue/teal colored
+    bars** (#306998) representing empirical density, with approximately 25 bins spanning
+    test scores from ~30 to ~126 on the x-axis. A smooth **yellow area spline curve**
+    (#FFD43B) overlays the histogram representing the theoretical normal distribution.
+    The y-axis shows probability density from 0 to ~0.037. The title "histogram-density
+    · highcharts · pyplots.ai" is prominently displayed at the top, with a subtitle
+    "Test Score Distribution with Normal PDF Overlay" below it. A legend in the upper
+    right corner identifies "Empirical Density" (blue) and "Normal PDF" (yellow).
+    The distribution is clearly bell-shaped, centered around 75-78, demonstrating
+    a good fit between the empirical histogram and theoretical PDF.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, axis titles at 36px, tick labels at 28px - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars well-sized, PDF curve clearly visible with good line
+          width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow colorblind-safe palette used
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good margins, plot fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Test Score" and "Probability Density" are descriptive but no units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (dashed lines), but y-axis has excessive tick marks
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density histogram with PDF overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows test scores, Y shows probability density
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes density normalization, theoretical PDF overlay as recommended
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Empirical Density and Normal PDF
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: histogram-density · highcharts · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full distribution shape, demonstrates density normalization,
+          includes theoretical overlay
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Test scores is a plausible scenario, though somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores centered at 75 with std of 12, realistic values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Effectively uses ColumnSeries for histogram, AreaSplineSeries for
+          smooth PDF curve, proper Highcharts styling with custom margins and font
+          sizes
+  verdict: APPROVED
diff --git a/plots/histogram-density/metadata/letsplot.yaml b/plots/histogram-density/metadata/letsplot.yaml
index d4e84ae3e4..43ee12a266 100644
--- a/plots/histogram-density/metadata/letsplot.yaml
+++ b/plots/histogram-density/metadata/letsplot.yaml
@@ -26,3 +26,176 @@ review:
   - A legend explaining the yellow line as Fitted Normal Distribution would add clarity
   - Could leverage more distinctive lets-plot features like scale_fill_* or tooltip
     configuration
+  image_description: The plot displays a density histogram of test scores. Blue bars
+    (#306998) with white borders represent the histogram bins showing the distribution
+    of scores from approximately 40-105 on the X-axis, with density values (0-0.042)
+    on the Y-axis. A smooth yellow/gold (#FFD43B) theoretical normal distribution
+    curve overlays the histogram, showing a fit centered around 75. The distribution
+    is slightly right-skewed with a visible bump around 85-90 representing high performers.
+    The title reads "histogram-density · letsplot · pyplots.ai". The background is
+    white with subtle gray grid lines. Text is clearly readable with appropriate font
+    sizes.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars well-sized with good alpha (0.7), PDF line visible at size 2.5
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight excess whitespace on right edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '''Test Score'' and ''Density'' are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), no legend needed but helpful one could
+          explain the PDF overlay
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density histogram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows test scores, Y-axis shows density (using `..density..`)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: density normalization, theoretical PDF overlay
+          for goodness of fit'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full range of data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A, but data elements are self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "histogram-density · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows bimodal distribution (main group + high performers), demonstrates
+          density normalization well. Minor: could show clearer separation between
+          groups'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores are a realistic, neutral, comprehensible scenario perfect
+          for density visualization
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 0-100 are perfect for test scores, density scale is appropriate,
+          400 observations is good sample size
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, pandas, lets_plot, scipy.stats)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' with path='.' which works, but minor point
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with `geom_histogram`, `..density..`
+          stat, `theme_minimal()`, and `ggsize()`. Good use of grammar of graphics
+          but doesn't leverage more distinctive lets-plot features like interactivity
+          configuration or color scales
+  verdict: APPROVED
diff --git a/plots/histogram-density/metadata/matplotlib.yaml b/plots/histogram-density/metadata/matplotlib.yaml
index c7cd31e92f..619d7f5842 100644
--- a/plots/histogram-density/metadata/matplotlib.yaml
+++ b/plots/histogram-density/metadata/matplotlib.yaml
@@ -23,3 +23,172 @@ review:
   - Could use more distinctive matplotlib features like fill_between for PDF shading
     or axvline for mean
   - Legend positioned in upper left slightly overlaps with potential data area
+  image_description: 'The plot displays a density histogram with blue bars representing
+    test score distribution (labeled "Observed Distribution"). The x-axis shows "Test
+    Score (points)" ranging from 30 to 110, while the y-axis shows "Probability Density"
+    ranging from 0.000 to 0.040. A yellow/gold curve overlays the histogram showing
+    the theoretical Normal PDF (μ=75, σ=12). The histogram bars have white edges and
+    semi-transparent fill (alpha=0.7). The legend is positioned in the upper left
+    corner. A subtle grid with dashed lines is visible. The title follows the required
+    format: "histogram-density · matplotlib · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars clearly visible, PDF line has good linewidth (3)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has units "Test Score (points)", Y-axis is descriptive "Probability
+          Density"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha is 0.3 (acceptable), but legend fontsize should match
+          other text proportionally
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density histogram with density=True
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to histogram
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Density histogram with PDF overlay as suggested in spec notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes both elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "histogram-density · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows normal distribution shape well, but could demonstrate more
+          distributional features (e.g., slight asymmetry from clipping)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores are a perfect, neutral, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores in 0-100 range with mean of 75 and SD of 12 are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses basic matplotlib features (hist with density, plot), but could
+          leverage more distinctive features like fill_between for PDF area, or axvline
+          for mean indicator
+  verdict: APPROVED
diff --git a/plots/histogram-density/metadata/plotly.yaml b/plots/histogram-density/metadata/plotly.yaml
index da953e1074..fd08cac3aa 100644
--- a/plots/histogram-density/metadata/plotly.yaml
+++ b/plots/histogram-density/metadata/plotly.yaml
@@ -25,3 +25,177 @@ review:
   - Bimodal distribution could be more visually distinct to better showcase density
     histogram capabilities
   - Could add hover templates to enhance Plotly interactive features
+  image_description: 'The plot displays a density histogram with blue bars (hex #306998)
+    representing test scores ranging from approximately 35 to 115 points. The y-axis
+    shows density (probability per unit) ranging from 0 to about 0.032. A smooth yellow/gold
+    KDE (Kernel Density Estimate) curve overlays the histogram, showing the continuous
+    density approximation. The distribution appears slightly bimodal with a main peak
+    around 72-75 points and a secondary shoulder around 85-88 points. The title "histogram-density
+    · plotly · pyplots.ai" is centered at the top. The legend in the upper right corner
+    shows "Test Scores" for the histogram and "Density Curve (KDE)" for the line.
+    The background is white with subtle gray gridlines. All text is clearly legible
+    with good font sizing.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars clearly visible with good opacity (0.75), KDE line
+          is thick (width=4) and prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Test Score (points)"
+          and "Density (probability per unit)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.1), legend well placed with background; however
+          grid could be slightly more visible at alpha 0.2-0.3
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density histogram with histnorm="probability density"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores correctly mapped to x-axis, density to y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has density normalization, KDE overlay as suggested in spec Notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate padding on x-axis
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels are accurate and descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution demonstrating density histogram's ability
+          to reveal distribution shape; however the bimodality is subtle and could
+          be more pronounced
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores are a realistic, neutral, and relatable context for educational
+          data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores 40-100+ are realistic for an exam scenario
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, plotly.graph_objects, scipy.stats)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Histogram with histnorm, go.Scatter for KDE overlay, plotly_white
+          template, interactive HTML export; however could leverage more Plotly-specific
+          features like hover templates
+  verdict: APPROVED
diff --git a/plots/histogram-density/metadata/plotnine.yaml b/plots/histogram-density/metadata/plotnine.yaml
index 6e66c5f0ca..85fd06fdc3 100644
--- a/plots/histogram-density/metadata/plotnine.yaml
+++ b/plots/histogram-density/metadata/plotnine.yaml
@@ -22,3 +22,173 @@ review:
   - Uses deprecated ..density.. syntax instead of modern after_stat(density)
   - Grid styling incorrectly uses element_text for panel_grid_major_y (should be element_line)
   - Axis labels lack units where applicable
+  image_description: The plot displays a density histogram showing a bimodal distribution
+    of test scores. Blue bars (#306998) with white edges represent the histogram bins,
+    normalized to show density on the y-axis. A smooth yellow/gold density curve (#FFD43B)
+    is overlaid on the histogram, clearly showing two peaks around scores of 65 and
+    85. The x-axis is labeled "Test Score" ranging from approximately 40 to 100, and
+    the y-axis shows "Density" from 0.00 to about 0.035. The title correctly reads
+    "histogram-density · plotnine · pyplots.ai". The plot uses a minimal theme with
+    subtle grid lines and good use of whitespace.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Histogram bars and density curve are clearly visible; alpha=0.7 works
+          well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe and provides excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight margin imbalance on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (density is unitless, but "Test Score"
+          could include "(points)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; no legend needed but grid styling uses element_text
+          incorrectly (should use element_line)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density histogram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis, density on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Density normalization present, PDF overlay included as suggested
+          in spec notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "histogram-density · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Bimodal distribution excellently demonstrates density histogram capabilities
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Test scores are plausible but somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores in realistic 40-100 range
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses deprecated `..density..` syntax; modern plotnine prefers `after_stat(density)`
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_histogram and geom_density overlay,
+          theme_minimal, but could leverage more plotnine-specific features like scale
+          adjustments
+  verdict: APPROVED
diff --git a/plots/histogram-density/metadata/pygal.yaml b/plots/histogram-density/metadata/pygal.yaml
index 27aa5d9441..1d8a52ad6b 100644
--- a/plots/histogram-density/metadata/pygal.yaml
+++ b/plots/histogram-density/metadata/pygal.yaml
@@ -25,3 +25,182 @@ review:
     which is not implemented
   - Could use pygal native Histogram chart type instead of Bar chart for more semantic
     correctness
+  image_description: The plot displays a density histogram with blue bars on a white
+    background. The title "histogram-density · pygal · pyplots.ai" appears at the
+    top. The x-axis is labeled "Test Score" with values ranging from approximately
+    40 to 95, with labels shown at intervals (40, 52, 64, 76, 88). The y-axis is labeled
+    "Density (Probability per Unit)" with values from 0 to approximately 0.034. The
+    histogram clearly shows a bimodal distribution with two peaks - one around score
+    65 and another around score 80-82, which aligns with the generated bimodal test
+    score data. Horizontal grid lines are visible at subtle intervals. The bars are
+    rendered in Python Blue (#306998) with slight transparency.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable. Font
+          sizes are well-scaled for the 4800x2700 canvas. Slight deduction as y-axis
+          tick labels are somewhat small.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. X-axis labels are spaced appropriately
+          by showing every 5th label.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are clearly visible with good sizing and appropriate opacity
+          (0.85).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue) with good contrast against white
+          background. No colorblind issues.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though margins could be slightly optimized.
+          Plot fills reasonable portion of canvas.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Test Score" and "Density
+          (Probability per Unit)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Y-axis grid is visible but no legend present (acceptable since single
+          series with show_legend=False). Grid is subtle.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct chart type: density histogram using bar chart with density-normalized
+          values.'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows bin centers (test scores), Y-axis shows density values.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows density (area=1 normalization), continuous variable distribution,
+          appropriate binning.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range from ~40-95 scores.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled appropriately for single series histogram.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-density · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution demonstrating density normalization well.
+          Could have included a reference line or PDF overlay as suggested in spec
+          notes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores is a realistic, neutral educational context. Bimodal
+          distribution represents two groups of students plausibly.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores clipped to 0-100 range, realistic normal distributions with
+          sensible means (65 and 82) and standard deviations.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → style → chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: numpy, pygal, Style.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Style customization, Bar chart with custom tooltips
+          via label dictionaries. Could leverage more pygal-specific features like
+          built-in tooltips or animations.
+  verdict: APPROVED
diff --git a/plots/histogram-density/metadata/seaborn.yaml b/plots/histogram-density/metadata/seaborn.yaml
index 598054c907..2a69e91060 100644
--- a/plots/histogram-density/metadata/seaborn.yaml
+++ b/plots/histogram-density/metadata/seaborn.yaml
@@ -23,3 +23,172 @@ review:
   weaknesses:
   - KDE line is slightly thick (linewidth=4) which could overshadow the histogram
     bars in some areas
+  image_description: The plot displays a density histogram of test scores using blue
+    bars (#306998) with white edges. The histogram shows a clear bimodal distribution
+    with peaks around 65 and 85 points, representing two student groups. A yellow/golden
+    KDE curve (Kernel Density Estimate) smoothly overlays the histogram, tracing the
+    density pattern. The title reads "histogram-density · seaborn · pyplots.ai" in
+    large font at the top. The x-axis is labeled "Test Score (points)" and the y-axis
+    "Probability Density". A legend in the upper left identifies the KDE curve and
+    Density Histogram. The plot has a subtle dashed grid and uses a clean 16:9 layout
+    with good canvas utilization.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths and alpha well-adapted for data density, KDE line clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills ~70% of canvas, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Test Score (points)", "Probability Density"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha=0.3, dashed), legend well placed in upper left
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct density histogram with stat="density"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis correctly shows test scores, Y-axis shows probability density
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Density histogram with KDE overlay as reference line (spec suggests
+          adding reference/theoretical PDF overlay)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible (20-100 range covers full distribution)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies histogram and KDE
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Title format is correct: "histogram-density · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Bimodal distribution excellently demonstrates density normalization
+          and shows varied distribution shape
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores with two student groups (average and high performers)
+          is a plausible educational scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores 0-100, realistic means (65, 85) and standard deviations
+          (10, 5)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of sns.histplot with stat="density" and sns.kdeplot
+          for smooth overlay, leveraging seaborn's statistical visualization strengths
+  verdict: APPROVED
diff --git a/plots/histogram-kde/metadata/altair.yaml b/plots/histogram-kde/metadata/altair.yaml
index 590882e82b..160ab20c4b 100644
--- a/plots/histogram-kde/metadata/altair.yaml
+++ b/plots/histogram-kde/metadata/altair.yaml
@@ -24,3 +24,175 @@ review:
     on the left
   - No subtle background grid to help read density values
   - Missing axis units (could be Test Score points and Density probability)
+  image_description: The plot displays a histogram with KDE overlay showing a bimodal
+    distribution of test scores. The histogram bars are rendered in semi-transparent
+    blue (#306998 with opacity 0.5), ranging from approximately 20 to 100 on the x-axis
+    labeled "Test Score". The y-axis shows "Density" values from 0.000 to 0.034. A
+    smooth yellow/gold KDE curve (#FFD43B) overlays the histogram, clearly showing
+    two peaks - a larger peak around 45 and a smaller peak around 72, demonstrating
+    the bimodal nature of the data. The title "histogram-kde · altair · pyplots.ai"
+    is displayed at the top center. The plot has good proportions with clean axis
+    labels and tick marks that are clearly readable.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Histogram bars and KDE line clearly visible; KDE strokeWidth=4 stands
+          out well against the bars
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe and has excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; minor issue with excessive x-axis range
+          showing 0-12 where no data exists
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Test Score" and "Density" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No visible grid or legend (no legend needed for this plot type, but
+          subtle grid would improve readability)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram with KDE overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis, density on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent bars (0.5 alpha), density-scaled y-axis, KDE curve
+          overlay - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "histogram-kde · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Bimodal distribution excellently demonstrates KDE smoothing vs histogram
+          binning
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Test scores are plausible scenario; bimodal could represent two class
+          sections
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores 20-100 with 500 observations is realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (altair, numpy, pandas, scipy.stats)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html correctly, but minor redundancy
+          in code structure
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's layered chart composition and encoding system well,
+          but could leverage tooltips or interactive features
+  verdict: APPROVED
diff --git a/plots/histogram-kde/metadata/bokeh.yaml b/plots/histogram-kde/metadata/bokeh.yaml
index aa9f8e007a..03438fcab7 100644
--- a/plots/histogram-kde/metadata/bokeh.yaml
+++ b/plots/histogram-kde/metadata/bokeh.yaml
@@ -26,3 +26,176 @@ review:
   - Legend is too small and positioned far from the main data area
   - Missing Bokeh-specific interactive features like HoverTool to show density values
     on hover
+  image_description: The plot displays a histogram with KDE overlay showing "Daily
+    Return (%)" on the x-axis and "Density" on the y-axis. The histogram bars are
+    rendered in a semi-transparent blue color (#306998 with alpha 0.5), and the KDE
+    curve is a yellow/gold line (#FFD43B) overlaid on top. The title "histogram-kde
+    · bokeh · pyplots.ai" appears in the top-left corner. The legend in the top-right
+    corner shows "Histogram" and "KDE" entries. The distribution is roughly centered
+    around 0% with visible fat tails on both sides (negative around -10% and positive
+    around +10-15%), representing realistic stock return behavior. Grid lines are
+    dashed and subtle. The background is a light gray (#fafafa).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable, slightly smaller than optimal for tick labels
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Histogram bars and KDE curve are clearly visible; KDE line width
+          of 5 is appropriate; histogram alpha of 0.5 allows KDE visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue histogram and yellow KDE line provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Daily Return (%)" and "Density" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but legend is tiny and positioned in
+          top-right corner far from data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram with KDE overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to histogram bins and KDE curve
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent bars, density-scaled y-axis, KDE curve - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range from about -12 to +17
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Histogram" and "KDE"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "histogram-kde · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Data shows main distribution plus fat tails on both sides, demonstrating
+          KDE's ability to smooth over binning artifacts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock returns with normal market conditions and tail events is a
+          real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Daily returns in percentage terms (-10% to +15%) are realistic for
+          financial data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → histogram computation → KDE computation
+          → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html for interactivity)
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Does not leverage Bokeh's interactive hover tools or tooltips that
+          could show bin values/density on hover; this is a missed opportunity for
+          Bokeh's strengths
+  verdict: APPROVED
diff --git a/plots/histogram-kde/metadata/highcharts.yaml b/plots/histogram-kde/metadata/highcharts.yaml
index eeb980794c..7506bc5191 100644
--- a/plots/histogram-kde/metadata/highcharts.yaml
+++ b/plots/histogram-kde/metadata/highcharts.yaml
@@ -22,3 +22,180 @@ review:
   weaknesses:
   - Grid lines could be slightly more visible (alpha 0.2-0.3 instead of 0.1)
   - Could add hover tooltips to leverage Highcharts interactivity strengths
+  image_description: The plot displays a histogram with KDE overlay showing simulated
+    daily stock returns. The histogram bars are semi-transparent blue (#306998 with
+    0.6 alpha) with darker blue borders. The KDE curve is rendered as a smooth yellow/golden
+    area spline (#FFD43B) with light yellow fill underneath. The title "histogram-kde
+    · highcharts · pyplots.ai" is prominently displayed at the top with a subtitle
+    "Simulated Daily Stock Returns (%)". The x-axis shows "Return (%)" ranging from
+    approximately -9 to 10.5, and the y-axis shows "Density" from 0 to 0.17. The legend
+    in the upper right identifies "Histogram" and "KDE" series. The distribution shows
+    a slight left skew with the main peak around -1.5 to 0, and a secondary concentration
+    around -3 reflecting the bimodal data generation.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable.
+          Slightly smaller than optimal for the canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-spaced and clear.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Histogram bars and KDE curve are well-sized. The semi-transparent
+          bars allow the KDE to show through nicely. Minor: some bars appear slightly
+          thin relative to bin width.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, providing excellent contrast.
+          No red-green conflicts.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space with appropriate margins. Plot area is well-proportioned.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "Return (%)" with units, Y-axis has "Density" without
+          units (density is unitless, so acceptable).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), legend is well-placed. Grid could be
+          slightly more visible.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements histogram with KDE overlay.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis, density on y-axis.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: histogram bars, KDE curve, density
+          scaling.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Histogram" and "KDE".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "histogram-kde · highcharts · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution demonstrating KDEs ability to capture
+          underlying shape. Could show more extreme tails or outliers.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock returns is an excellent realistic context for histogram-KDE
+          visualization.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Return values are realistic (-9% to +10% range). 500 data points
+          is appropriate.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save. No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current API patterns.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates plot.html (minor - both files
+          are acceptable for highcharts).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses AreaSplineSeries for smooth KDE curve and ColumnSeries for histogram.
+          Could leverage more Highcharts-specific features like tooltips or hover
+          effects.
+  verdict: APPROVED
diff --git a/plots/histogram-kde/metadata/letsplot.yaml b/plots/histogram-kde/metadata/letsplot.yaml
index 76be4a671d..43d0b1e90f 100644
--- a/plots/histogram-kde/metadata/letsplot.yaml
+++ b/plots/histogram-kde/metadata/letsplot.yaml
@@ -26,3 +26,175 @@ review:
   weaknesses:
   - Output path uses Path(__file__).parent which saves to implementations directory;
     should save to working directory as plot.png
+  image_description: The plot displays a histogram with KDE overlay showing simulated
+    daily stock returns. The histogram bars are semi-transparent blue (#306998) with
+    darker blue borders, ranging from approximately -9% to +10% on the x-axis. The
+    y-axis displays "Density" from 0 to 0.26. A bright yellow/gold KDE curve (#FFD43B)
+    smoothly overlays the histogram, revealing the underlying probability density.
+    The distribution is roughly bell-shaped but shows heavier tails and slight asymmetry,
+    consistent with leptokurtic financial returns. The title "histogram-kde · letsplot
+    · pyplots.ai" appears at the top. The minimal grid with subtle gray lines provides
+    good readability without distraction.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Histogram bars and KDE curve are well-sized; KDE line thickness (size=2)
+          is good but could be slightly thicker for better visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe and provides clear distinction
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Daily Return (%)" and "Density" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present (though not strictly necessary for this plot type,
+          having one labeling "Histogram" and "KDE" would improve clarity)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram with KDE overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis correctly shows the continuous variable
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has histogram bars, KDE overlay, density scale, semi-transparent
+          bars (alpha=0.5), contrasting colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range of data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this single-variable plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "histogram-kde · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows distribution shape, tails, and central tendency well; the leptokurtic
+          nature is visible but could show slightly more pronounced tail events
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock daily returns is a perfect real-world scenario for histogram+KDE
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for daily returns (-9% to +10%), though most
+          returns cluster between -3% and +3%
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to implementations directory instead of current working directory
+          as `plot.png`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, geom_histogram with density stat, geom_density,
+          theme customization, but doesn't leverage lets-plot specific interactive
+          features or advanced theming
+  verdict: APPROVED
diff --git a/plots/histogram-kde/metadata/matplotlib.yaml b/plots/histogram-kde/metadata/matplotlib.yaml
index c5d2aa428a..a8fa47aac0 100644
--- a/plots/histogram-kde/metadata/matplotlib.yaml
+++ b/plots/histogram-kde/metadata/matplotlib.yaml
@@ -28,3 +28,176 @@ review:
     background)
   - Manual KDE implementation, while correct, does not leverage scipy.stats.gaussian_kde
     which would be more robust
+  image_description: The plot displays a histogram with KDE (Kernel Density Estimate)
+    overlay showing daily stock returns as percentages. The histogram bars are rendered
+    in a muted blue color (#306998) with semi-transparency (alpha ~0.5) and darker
+    blue edges. A smooth yellow/gold KDE curve (#FFD43B) overlays the histogram with
+    a thick linewidth, clearly visible against the bars. The x-axis is labeled "Daily
+    Return (%)" ranging from approximately -10% to +10%, and the y-axis shows "Density"
+    from 0.00 to ~0.26. The title follows the correct format "histogram-kde · matplotlib
+    · pyplots.ai". A legend in the upper right clearly identifies "Histogram" and
+    "KDE". The distribution shows a roughly normal shape centered near 0, with some
+    fat tails extending to extreme values, consistent with realistic financial return
+    data. The plot has a subtle dashed grid.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Histogram bars and KDE line both clearly visible; KDE linewidth=4
+          is appropriate; bars have good alpha for transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned properly
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Daily Return (%)" with units, "Density" is appropriate for y-axis'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha=0.3, dashed), legend well placed but could have
+          frame
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram with KDE overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Single continuous variable correctly mapped to x-axis, density on
+          y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has semi-transparent bars (~0.5 alpha), density scaling, KDE curve,
+          contrasting colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range displayed from -10% to +10%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Histogram" and "KDE"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format `histogram-kde · matplotlib · pyplots.ai`
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Data shows fat-tailed distribution with mixture of normal and volatile
+          periods, demonstrates both histogram binning and smooth KDE curve
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock daily returns is a perfect real-world use case for histogram-KDE
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Returns centered near 0% with ±10% range is realistic for daily stock
+          returns
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses appropriate matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Manual KDE implementation is educational but could have used scipy.stats.gaussian_kde
+          or seaborn's kdeplot for more robust estimation. The manual implementation
+          using Scott's rule is correct and demonstrates understanding.
+  verdict: APPROVED
diff --git a/plots/histogram-kde/metadata/plotly.yaml b/plots/histogram-kde/metadata/plotly.yaml
index 3472395d06..453b334e30 100644
--- a/plots/histogram-kde/metadata/plotly.yaml
+++ b/plots/histogram-kde/metadata/plotly.yaml
@@ -23,3 +23,163 @@ review:
   weaknesses:
   - Legend background could be slightly more transparent (0.7 vs 0.8 opacity)
   - Grid alpha very subtle at 0.1 - could be 0.2-0.3 for better visual guidance
+  image_description: The plot displays a histogram with KDE overlay visualizing daily
+    stock returns. The histogram uses semi-transparent blue bars (#306998 with 50%
+    opacity) showing return distribution from approximately -3% to +3%. A bright yellow/gold
+    KDE curve (#FFD43B) smoothly overlays the histogram, revealing the underlying
+    probability density. The title "histogram-kde · plotly · pyplots.ai" is centered
+    at the top. X-axis labeled "Daily Return (%)", y-axis labeled "Density". Legend
+    in upper right corner shows "Histogram" and "KDE" entries with a semi-transparent
+    white background. Clean white template with subtle gridlines and a zero-line on
+    the x-axis.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title ~32pt, labels ~24pt, ticks ~18pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars and KDE curve well-sized for data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and yellow have good contrast; minor deduction as yellow could
+          be slightly more saturated
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent layout with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha 0.1, legend well-placed but background slightly
+          large
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram with KDE overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Returns on x-axis, density on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent bars (~0.5 alpha), density-scaled y-axis, smooth
+          KDE, contrasting colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range shown with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Labels accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exact format: "histogram-kde · plotly · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows negative skew, heavier left tail, asymmetric distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock returns scenario realistic with 550 observations
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Daily returns (-3% to +3%) realistic for stocks
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/histogram-kde/metadata/plotnine.yaml b/plots/histogram-kde/metadata/plotnine.yaml
index 981bdc652b..28f642ccdb 100644
--- a/plots/histogram-kde/metadata/plotnine.yaml
+++ b/plots/histogram-kde/metadata/plotnine.yaml
@@ -21,3 +21,160 @@ review:
   weaknesses:
   - Grid lines are nearly invisible (panel_grid_major alpha=0.3 combined with light
     color makes them hard to see)
+  image_description: The plot shows a histogram with KDE overlay visualizing stock
+    daily returns. The histogram bars are a semi-transparent steel blue (#306998)
+    with darker blue borders, allowing the bright yellow (#FFD43B) KDE curve to be
+    clearly visible overlaid on top. The distribution is centered around 0% with a
+    slight negative skew and fat tails extending from approximately -10% to +7%. The
+    x-axis is labeled "Daily Return (%)" and the y-axis shows "Density" ranging from
+    0.0 to 0.3. The title "histogram-kde · plotnine · pyplots.ai" appears at the top.
+    The layout uses a minimal theme with a clean white background and very subtle
+    grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text perfectly readable with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars and KDE line optimally sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is nearly invisible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram with KDE overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned with density scaling
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All spec features present (alpha, density scale, contrasting colors)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: histogram-kde · plotnine · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows realistic distribution with skewness and fat tails
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock returns with varied market conditions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic percentage values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/histogram-kde/metadata/pygal.yaml b/plots/histogram-kde/metadata/pygal.yaml
index 9b24548894..23156e11d7 100644
--- a/plots/histogram-kde/metadata/pygal.yaml
+++ b/plots/histogram-kde/metadata/pygal.yaml
@@ -25,3 +25,180 @@ review:
   weaknesses:
   - Grid lines are somewhat prominent and could use subtle alpha adjustment
   - KDE curve stroke width could be slightly more prominent against the filled area
+  image_description: The plot displays a histogram with KDE overlay visualizing stock
+    return distributions. The histogram is rendered in semi-transparent blue (#306998)
+    using a step-like XY path approach, with bars showing frequency counts across
+    25 bins spanning from approximately -5% to +6% daily returns. A smooth red/pink
+    KDE curve overlays the histogram, following the distribution shape. The title
+    "histogram-kde · pygal · pyplots.ai" appears at the top. The X-axis is labeled
+    "Daily Return (%)" and the Y-axis shows "Probability Density" with values from
+    0 to ~0.27. A legend at the bottom identifies "Histogram" and "KDE Curve". The
+    distribution shows a main peak centered near 0%, with visible left tail (market
+    drops) and right tail (market rallies) demonstrating realistic stock return behavior.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable; font sizes
+          appropriately scaled for 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Histogram bars and KDE curve are visible; KDE line could be slightly
+          thicker for better visibility against the filled histogram
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue histogram and red KDE curve provide excellent contrast; colorblind-safe
+          palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins; legend well-positioned
+          at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Daily Return (%)" and "Probability
+          Density"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines are present but the vertical guide lines are somewhat
+          prominent; legend placement is good
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements histogram with KDE overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows return values, Y-axis shows probability density correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both histogram bars and smooth KDE curve present as required
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range including tails
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Histogram" and "KDE Curve"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-kde · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows main distribution plus left and right tails; demonstrates skewness
+          and tail behavior well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock returns is a perfect realistic scenario for histogram-KDE;
+          values are plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Daily returns in the -5% to +6% range are realistic; main distribution
+          centered near 0.05% is sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → histogram computation →
+          KDE computation → chart creation → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic output
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: numpy, pygal, Style'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as both plot.html and plot.png (minor issue, but spec expects
+          plot.png)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of pygal's XY chart with custom Style, fill options, and
+          stroke_style; however, doesn't leverage any particularly unique pygal features
+          like tooltips or animations
+  verdict: APPROVED
diff --git a/plots/histogram-kde/metadata/seaborn.yaml b/plots/histogram-kde/metadata/seaborn.yaml
index 623157a13e..ecc6c14f1f 100644
--- a/plots/histogram-kde/metadata/seaborn.yaml
+++ b/plots/histogram-kde/metadata/seaborn.yaml
@@ -22,3 +22,171 @@ review:
   - Clean visual design with removed top/right spines
   weaknesses:
   - No legend to explicitly identify the histogram bars vs KDE curve
+  image_description: 'The plot displays a histogram with KDE overlay showing daily
+    stock returns (%). The histogram uses blue semi-transparent bars (#306998) with
+    white edges, showing the frequency distribution of returns centered around 0%.
+    A smooth yellow/gold KDE curve (#FFD43B) overlays the histogram, highlighting
+    the underlying probability density. The x-axis shows "Daily Return (%)" ranging
+    from approximately -8 to +8, and the y-axis shows "Density" ranging from 0.00
+    to 0.35. The title follows the correct format: "histogram-kde · seaborn · pyplots.ai".
+    The distribution is roughly bell-shaped with slight left skewness, visible tail
+    behavior on both ends, and the KDE smoothly traces the shape of the histogram
+    bars. Top and right spines are removed for a cleaner look, and a subtle dashed
+    grid is present.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars and KDE line clearly visible, appropriate alpha (0.5)
+          for bars
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue histogram and yellow KDE line provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Daily Return (%)" includes units, "Density" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3), but no legend present (though not strictly
+          required here as colors are self-evident)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram with KDE overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous values correctly mapped to x-axis, density on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent bars, density scaling, KDE overlay - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range of data visible including tails
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present to distinguish histogram vs KDE
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: histogram-kde · seaborn · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows distribution shape, skewness, tail behavior, and central tendency
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock returns is a perfect real-world scenario for this plot type
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Daily returns of -8% to +8% with most values in ±2% is realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern seaborn API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses sns.histplot with stat="density" and sns.kdeplot for separate
+          control of histogram and KDE styling
+  verdict: APPROVED
diff --git a/plots/histogram-overlapping/metadata/altair.yaml b/plots/histogram-overlapping/metadata/altair.yaml
index 5044ded983..8d348dad10 100644
--- a/plots/histogram-overlapping/metadata/altair.yaml
+++ b/plots/histogram-overlapping/metadata/altair.yaml
@@ -20,4 +20,182 @@ review:
   - Tooltips enhance interactivity when viewed as HTML
   - Proper use of stack=None to achieve true overlapping rather than stacked bars
   - Grid styling is subtle and non-distracting with dashed lines
-  weaknesses: []
+  weaknesses:
+  - 'Grid styling score should be 2/2 not 0/2 (correcting evaluation: grid is excellent)'
+  image_description: 'The plot displays three overlapping histograms showing employee
+    response times (in milliseconds) by department. The chart uses a 16:9 aspect ratio
+    with the title "histogram-overlapping · altair · pyplots.ai" centered at the top.
+    Three departments are represented with distinct, semi-transparent colors: Engineering
+    (blue, #306998), Sales (yellow, #FFD43B), and Support (green, #4CAF50). The x-axis
+    shows "Response Time (ms)" ranging from approximately 50 to 850, and the y-axis
+    shows "Frequency" ranging from 0 to 50. A legend on the right side clearly identifies
+    each department with colored squares. The distributions are clearly visible with
+    overlapping regions showing blended colors due to the 0.5 opacity. Engineering
+    peaks around 350ms, Sales peaks around 450ms, and Support peaks around 280ms.
+    Subtle dashed grid lines with low opacity enhance readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlapping anywhere; bars overlap as intended for the plot
+          type
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are clearly visible with appropriate opacity (0.5), though some
+          bars at the tails could be slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Green palette is colorblind-friendly (avoids red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, legend appropriately
+          positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units ("Response Time (ms)",
+          "Frequency")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), legend well-placed with proper
+          sizing
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct overlapping histogram type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = continuous variable (response time), Y = frequency, color = group
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fills (0.5 opacity), distinct colors, legend present,
+          aligned bins across groups (stack=None), consistent bin widths
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible from ~50ms to ~850ms
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Engineering, Sales, Support
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "histogram-overlapping · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three overlapping distributions with different centers and
+          spreads; Support is tighter (scale=60), Sales is wider (scale=100), Engineering
+          in between (scale=80). Could show more distinct overlap scenarios
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee response times by department is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times of 280-420ms are plausible for system/application
+          response metrics, though slightly on the higher end
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only imports altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Leverages Altair's declarative encoding with proper type annotations
+          (:Q, :N), uses stack=None for overlapping, includes tooltips, proper mark_bar
+          with binSpacing=0, configure_axis for styling
+  verdict: APPROVED
diff --git a/plots/histogram-overlapping/metadata/bokeh.yaml b/plots/histogram-overlapping/metadata/bokeh.yaml
index e8e7c84a73..7ec61c79c0 100644
--- a/plots/histogram-overlapping/metadata/bokeh.yaml
+++ b/plots/histogram-overlapping/metadata/bokeh.yaml
@@ -25,3 +25,181 @@ review:
   - Legend positioning leaves excessive whitespace on the right side of the plot
   - Could use ColumnDataSource for more idiomatic Bokeh code
   - HoverTool could be customized to show bin range and count information
+  image_description: 'The plot displays three overlapping histograms showing employee
+    response times (in milliseconds) by department. The Engineering distribution (blue,
+    #306998) is centered around 250ms with moderate spread. The Sales distribution
+    (yellow/gold, #FFD43B) is centered around 320ms with wider spread, showing some
+    values extending to 600ms. The Support distribution (green, #4CAF50) is centered
+    around 280ms with tighter spread. All three histograms use semi-transparent fills
+    (alpha 0.5) allowing overlapping regions to be visible. The title "histogram-overlapping
+    · bokeh · pyplots.ai" appears at the top left. Axis labels show "Response Time
+    (ms)" on x-axis and "Frequency" on y-axis. A legend in the top right identifies
+    all three departments. The grid uses subtle dashed lines. Interactive Bokeh toolbar
+    is visible in the top right corner.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, axis labels at 32pt, tick labels at 24pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Histogram bars are well-sized with good transparency; slight deduction
+          as some overlapping regions are dense
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, green are distinguishable and reasonably colorblind-safe,
+          though green-yellow proximity could be slightly better
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Response Time (ms)", "Frequency"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), but legend is placed in top-right
+          with excessive empty space on the right side of the plot; legend could be
+          positioned better
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct overlapping histogram type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly binned, groups properly distinguished
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fills, distinct colors, legend, aligned bins - all
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Engineering, Sales, Support
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "histogram-overlapping · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distributions with different centers and spreads; overlapping
+          regions visible; slight deduction as distributions could show more distinct
+          separation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee response times by department is a plausible, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times in 100-600ms range are reasonable; slight deduction
+          as some extreme Sales values (>500ms) seem high
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Minor: output_file/save is used but export_png already creates the
+          PNG'
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's quad glyph appropriately, includes interactive tools
+          (pan, wheel_zoom, box_zoom, reset, hover), exports both PNG and HTML. Could
+          leverage ColumnDataSource or HoverTool customization for richer interactivity.
+  verdict: APPROVED
diff --git a/plots/histogram-overlapping/metadata/highcharts.yaml b/plots/histogram-overlapping/metadata/highcharts.yaml
index 4fa18550bd..0e06afc122 100644
--- a/plots/histogram-overlapping/metadata/highcharts.yaml
+++ b/plots/histogram-overlapping/metadata/highcharts.yaml
@@ -24,3 +24,178 @@ review:
     to the chart area
   - Y-axis label Frequency (Count) is slightly redundant
   - Image height is 2561px instead of target 2700px
+  image_description: |-
+    The plot displays three overlapping histograms showing employee performance score distributions across three departments. The chart uses semi-transparent column bars with:
+    - **Yellow** (Sales, n=150) - widest distribution, lower center
+    - **Purple** (Marketing, n=150) - middle layer
+    - **Blue** (Engineering, n=150) - highest concentration around 71-78 range
+
+    The title reads "histogram-overlapping · highcharts · pyplots.ai" with subtitle "Employee Performance Score Distribution by Department". X-axis shows "Performance Score" (ranging from 16 to 125), Y-axis shows "Frequency (Count)" (0-56). A boxed legend in the upper right identifies each department with sample sizes. Bars overlap correctly with transparency (~0.55 alpha), allowing visibility of all three distributions. The overlapping regions show blended colors where distributions intersect.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable
+          at appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized and transparency works well; slight deduction
+          as some smaller bars at distribution tails are harder to see
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow, purple, and blue palette is colorblind-safe (no red-green
+          combination)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; slight excess whitespace at bottom margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Performance Score" and "Frequency (Count)" are descriptive with
+          context'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is very subtle (good), but legend placement in upper right with
+          large white box feels slightly disconnected from the data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct overlapping histogram using column chart with grouping disabled
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows bin ranges, Y-axis shows frequency counts correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fills, distinct colors, legend with group labels,
+          aligned bin edges
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies each department with sample sizes
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "histogram-overlapping · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows different central tendencies and spreads; Engineering peaks
+          higher, Sales has wider spread. Minor deduction: could show more dramatic
+          differences'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department is a realistic, relatable
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores 0-100 range makes sense for performance; some outliers extend
+          to 125 which is slightly unusual for a 100-point scale
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → histogram computation
+          → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png ✓ but image dimensions are 4800x2561 instead of
+          4800x2700 (minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses grouping:false for overlap effect, shared tooltips, proper Highcharts
+          column series configuration. Could leverage more interactive features like
+          hover states or data labels.
+  verdict: APPROVED
diff --git a/plots/histogram-overlapping/metadata/letsplot.yaml b/plots/histogram-overlapping/metadata/letsplot.yaml
index e3e541d9fb..ea3fb8990a 100644
--- a/plots/histogram-overlapping/metadata/letsplot.yaml
+++ b/plots/histogram-overlapping/metadata/letsplot.yaml
@@ -25,3 +25,179 @@ review:
   - Grid lines extend beyond data area, could be more contained
   - Could leverage more lets-plot distinctive features (e.g., interactive tooltips
     in HTML output)
+  image_description: 'The plot displays two overlapping histograms comparing response
+    times between Control (steel blue, #306998) and Treatment (golden yellow, #FFD43B)
+    groups. The Control distribution is centered around 450-480ms while the Treatment
+    distribution is centered around 350-400ms, clearly showing faster response times
+    for the treatment condition. The semi-transparent fills (alpha=0.5) create an
+    olive/greenish overlap region where distributions intersect, effectively demonstrating
+    the comparison. The title correctly follows the format "histogram-overlapping
+    · letsplot · pyplots.ai". Axis labels show "Response Time (ms)" on X and "Count"
+    on Y. A legend on the right identifies the two conditions. The plot uses a minimal
+    theme with subtle grid lines on a light background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate font sizes (24pt title, 20pt labels, 16pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and ticks are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars are well-sized, alpha=0.5 transparency allows overlapping
+          regions to be visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, providing
+          excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though there's slightly more whitespace
+          on the right side due to legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with units "Response Time (ms)", Y-axis
+          has "Count"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, but legend title says "Condition"
+          while the data column is named "group" - minor inconsistency; legend could
+          be better integrated
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct overlapping histogram with semi-transparent bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis, groups distinguished by fill color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fills (alpha=0.5), distinct contrasting colors,
+          legend present, aligned bin edges
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes appropriately scaled from ~150-700ms
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Control and Treatment groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "histogram-overlapping · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows overlapping distributions with clear difference in central
+          tendency, but distributions could show more variation in spread/shape
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Response time comparison in A/B testing is an excellent, realistic
+          scenario mentioned in spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times of 300-600ms are realistic for cognitive/UI response
+          experiments
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used, specific imports from lets_plot
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly, but doesn't leverage lets-plot's distinctive
+          interactive features or advanced theming beyond basics
+  verdict: APPROVED
diff --git a/plots/histogram-overlapping/metadata/matplotlib.yaml b/plots/histogram-overlapping/metadata/matplotlib.yaml
index 4a2c735199..22166a8f1f 100644
--- a/plots/histogram-overlapping/metadata/matplotlib.yaml
+++ b/plots/histogram-overlapping/metadata/matplotlib.yaml
@@ -23,3 +23,172 @@ review:
   - Grid only on y-axis; adding subtle x-axis grid could improve value reading
   - Could use matplotlib histtype=stepfilled or density normalization for enhanced
     visualization
+  image_description: 'The plot displays three overlapping histograms comparing annual
+    salary distributions across three departments: Engineering (steel blue), Marketing
+    (golden yellow), and Sales (teal/cyan). The Engineering distribution is centered
+    around $95k with a tight spread, Marketing around $75k with moderate spread, and
+    Sales around $65k with the widest spread. The semi-transparent bars (alpha=0.5)
+    effectively show overlapping regions where distributions intersect. The x-axis
+    shows "Annual Salary ($)" formatted in thousands (20k-140k), and the y-axis shows
+    "Number of Employees" (0-35). A legend in the upper right clearly identifies each
+    department. The title follows the required format: "histogram-overlapping · matplotlib
+    · pyplots.ai". A subtle y-axis grid with dashed lines aids readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars well-sized, transparency allows all distributions
+          to be visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but blue/teal could be slightly closer
+          for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Annual Salary ($)", "Number of Employees"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but only on y-axis; legend well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: overlapping histograms'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous salary on x-axis, count on y-axis, grouped by department
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fills, distinct colors, legend, aligned bins
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range (20k-150k)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Engineering, Marketing, Sales
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "histogram-overlapping · matplotlib · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different central tendencies, different spreads, and overlapping
+          regions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary comparison across departments is a real, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary values ($20k-$150k) are realistic for US departments
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic ax.hist(); could leverage histtype='stepfilled' or additional
+          matplotlib-specific features
+  verdict: APPROVED
diff --git a/plots/histogram-overlapping/metadata/plotly.yaml b/plots/histogram-overlapping/metadata/plotly.yaml
index 57a01b495b..2ec72ddce7 100644
--- a/plots/histogram-overlapping/metadata/plotly.yaml
+++ b/plots/histogram-overlapping/metadata/plotly.yaml
@@ -19,4 +19,162 @@ review:
   - Proper font sizing scaled for 4800x2700 output
   - Includes both PNG and interactive HTML export (leveraging plotly strengths)
   - Clean, well-structured code following KISS principles
-  weaknesses: []
+  weaknesses:
+  - None significant - implementation is publication quality
+  image_description: 'The plot displays two overlapping histograms comparing height
+    distributions between Male (steel blue, #306998) and Female (gold/yellow, #FFD43B)
+    groups. The title "histogram-overlapping · plotly · pyplots.ai" is centered at
+    the top in large font. The x-axis shows "Height (cm)" ranging from ~145 to ~195
+    cm, and the y-axis shows "Frequency" from 0 to 40. Both histograms use 50% opacity
+    with overlay mode, revealing the intersection region around 160-175 cm where distributions
+    overlap. A legend with semi-transparent background is positioned in the top-right
+    corner. The grid is subtle with light gray lines on a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, ticks at 18pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars well-sized with clear bin edges and outlines
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: false
+        comment: '"Height (cm)" includes units, "Frequency" is appropriate'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.1, legend well-placed with background
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct overlapping histogram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values on X-axis, groups distinguished by color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fills (0.5 opacity), distinct colors, legend, aligned
+          bins (size=3)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Male" and "Female" labels correct'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "histogram-overlapping · plotly · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear overlap region, different means, different spreads
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Height by gender is a classic real-world example with known distributions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Male mean ~175cm, Female mean ~162cm - realistic human height values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html for interactivity)
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/histogram-overlapping/metadata/plotnine.yaml b/plots/histogram-overlapping/metadata/plotnine.yaml
index 960f679fae..0cee491224 100644
--- a/plots/histogram-overlapping/metadata/plotnine.yaml
+++ b/plots/histogram-overlapping/metadata/plotnine.yaml
@@ -24,3 +24,178 @@ review:
   weaknesses:
   - 'Legend order is alphabetical rather than matching the logical data pattern (could
     order by mean response time: Power Users → Regular Users → New Users)'
+  image_description: "The plot displays three overlapping histograms showing response\
+    \ time distributions (in milliseconds) for three user groups. The x-axis shows\
+    \ \"Response Time (ms)\" ranging from approximately 0 to 800ms, and the y-axis\
+    \ shows \"Frequency\" ranging from 0 to about 33. Three distinct distributions\
+    \ are visible:\n- **Power Users** (yellow): Concentrated around 200ms with the\
+    \ tightest distribution\n- **Regular Users** (salmon/coral): Centered around 350ms\
+    \ with moderate spread  \n- **New Users** (blue): Centered around 450-500ms with\
+    \ the widest spread\n\nThe histograms use semi-transparent fills (alpha ~0.5)\
+    \ allowing overlap regions to be clearly visible. A legend labeled \"User Group\"\
+    \ is positioned on the right side. The title follows the correct format: \"histogram-overlapping\
+    \ · plotnine · pyplots.ai\". The plot uses a minimal theme with subtle grid lines."
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend all clearly readable at
+          appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap issues
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars well-sized with appropriate alpha for overlapping
+          visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral are distinguishable; yellow could be slightly
+          harder to see against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has units "Response Time (ms)", Y-axis labeled "Frequency"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is very subtle (good), but legend shows groups in alphabetical
+          order rather than logical order (New→Regular→Power would match the visual
+          left-to-right distribution pattern)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct overlapping histogram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Response time on X, frequency on Y, grouped by user type
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fills, distinct colors, legend present, aligned
+          bins
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-overlapping · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows three distinct distributions with different means and spreads,
+          clear overlap regions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Response times for different user experience levels is a realistic,
+          comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times in 100-800ms range are realistic for web application
+          metrics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Proper ggplot2 grammar with aes mapping, geom_histogram with position="identity",
+          scale_fill_manual, theme customization
+  verdict: APPROVED
diff --git a/plots/histogram-overlapping/metadata/pygal.yaml b/plots/histogram-overlapping/metadata/pygal.yaml
index 5df1be3805..97b1678e06 100644
--- a/plots/histogram-overlapping/metadata/pygal.yaml
+++ b/plots/histogram-overlapping/metadata/pygal.yaml
@@ -27,3 +27,180 @@ review:
     or adjusted legend positioning
   - Does not leverage pygal distinctive interactive features (tooltips, value formatters,
     custom hover effects) that differentiate it from static libraries
+  image_description: 'The plot displays two overlapping histograms comparing height
+    distributions by gender. The Male distribution (blue/steel color) is centered
+    around 175cm with a peak frequency of ~42, while the Female distribution (yellow/gold
+    color) is centered around 160cm with a peak frequency of ~40. Both distributions
+    use semi-transparent fills (approximately 50% opacity), allowing the overlapping
+    region (around 160-175cm) to show a darker olive/brown blend color. The title
+    "histogram-overlapping · pygal · pyplots.ai" appears at the top in a clean sans-serif
+    font. The X-axis is labeled "Height (cm)" ranging from 140-200, and the Y-axis
+    shows "Frequency" from 0-40+. A legend at the bottom identifies "Male" and "Female"
+    series. The plot has subtle horizontal grid lines and uses the pyplots color scheme
+    (blue #306998 and yellow #FFD43B).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          size. Font sizes are well-scaled for 4800x2700.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Legend is positioned at bottom, separate
+          from data.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Histogram bars are clearly visible. The overlapping region shows
+          both distributions well due to good transparency. Minor deduction: the overlap
+          blend color could be slightly more distinct.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe and provide excellent contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with appropriate margins. Good use of space.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Height (cm)" includes units, "Frequency" is descriptive.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend text "Female" overlaps with X-axis title "Height (cm)". The
+          legend placement at bottom causes a collision.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct histogram chart type using pygal.Histogram()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Height values correctly binned on X-axis, frequency on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fills, distinct colors, legend, aligned bin edges
+          - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full range of data (140-200cm covers both distributions)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Male and Female groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows overlapping distributions with clear separation of means, different
+          spreads visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Human height by gender is a classic, relatable example with realistic
+          parameters
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Male ~175cm (σ=7), Female ~162cm (σ=6) are realistic anthropometric
+          values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → histogram calculation → style → chart
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style are imported, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic pygal.Histogram() without leveraging pygal-specific features
+          like tooltips, custom formatters, or interactive configurations that would
+          enhance the SVG output.
+  verdict: APPROVED
diff --git a/plots/histogram-overlapping/metadata/seaborn.yaml b/plots/histogram-overlapping/metadata/seaborn.yaml
index 37b23df551..275a5ce3dd 100644
--- a/plots/histogram-overlapping/metadata/seaborn.yaml
+++ b/plots/histogram-overlapping/metadata/seaborn.yaml
@@ -25,3 +25,169 @@ review:
     aligned across all groups (spec recommends aligned bin edges)
   - Could use more idiomatic seaborn approach with DataFrame and hue parameter instead
     of three separate histplot calls
+  image_description: The plot displays three overlapping histograms showing employee
+    response times (ms) by department. Engineering (blue) is centered around 450ms
+    with a tight distribution, Marketing (yellow) has a wider spread centered around
+    520ms extending to ~900ms, and Sales (pink/coral) shows a consistent mid-range
+    distribution around 480ms. The transparency (alpha=0.5) effectively reveals overlapping
+    regions where distributions intersect. The title "histogram-overlapping · seaborn
+    · pyplots.ai" appears at the top, with a clear legend in the upper right corner.
+    Axis labels include units, and a subtle dashed y-axis grid aids readability.
+  criteria_checklist:
+    visual_quality:
+      score: 39
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap issues
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars clearly visible with appropriate alpha for overlapping
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/coral distinguishable; yellow/coral slightly close for
+          some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Response Time (ms)", "Count"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid (alpha=0.3), legend well-placed upper right
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct overlapping histogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows continuous values, Y-axis shows frequency count
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has transparency, legend, distinct colors; bins independently calculated
+          per group rather than explicitly aligned
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Engineering, Marketing, Sales
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct: "histogram-overlapping · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different central tendencies (450/520/480ms), different spreads
+          (60-100 std), clear overlap regions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee response times by department is a plausible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 300-700ms response times are realistic for employee metrics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.histplot correctly with good styling, but could leverage
+          DataFrame + hue parameter for more idiomatic seaborn
+  verdict: APPROVED
diff --git a/plots/histogram-stacked/metadata/altair.yaml b/plots/histogram-stacked/metadata/altair.yaml
index 1cfa22121f..23b446c1f6 100644
--- a/plots/histogram-stacked/metadata/altair.yaml
+++ b/plots/histogram-stacked/metadata/altair.yaml
@@ -28,3 +28,177 @@ review:
   - Could add tooltips for interactive exploration showing exact counts per bin
   - The order parameter could be more intentionally designed to show a specific visual
     story (e.g., ordering by mean score)
+  image_description: 'The plot displays a stacked histogram showing test score distributions
+    for three study methods. The X-axis shows "Test Score (points)" ranging from 40
+    to 100, and the Y-axis shows "Number of Students" ranging from 0 to 80. Three
+    groups are stacked: Traditional Study (blue, #306998), Active Recall (yellow,
+    #FFD43B), and Passive Reading (orange, #E67E22). The stacking clearly shows individual
+    group contributions with total bar heights representing combined frequencies.
+    The title "histogram-stacked · altair · pyplots.ai" appears at the top. A legend
+    on the right identifies each study method. The grid uses subtle dashed lines with
+    low opacity.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with good opacity (0.85), white stroke separates
+          segments nicely
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, orange palette is colorblind-friendly (no red-green
+          issues), though could be slightly more distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot fills ~60% of canvas, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Test Score (points)", "Number of Students"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), but legend title font could better
+          match overall styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked histogram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable (Score) binned on X, count on Y, category for
+          color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: stacking, distinct colors, legend with
+          group labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate bin boundaries (40-100)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data groups correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-stacked · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct distributions with different means and spreads,
+          demonstrates stacking well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Educational test scores with study methods is a neutral, relatable
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Test scores 0-100 are realistic; distributions centered around 68-78
+          are plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of declarative encoding with alt.X, alt.Y, alt.Color, binning,
+          and stack configuration, but could leverage more Altair-specific features
+          like tooltips or selection
+  verdict: APPROVED
diff --git a/plots/histogram-stacked/metadata/bokeh.yaml b/plots/histogram-stacked/metadata/bokeh.yaml
index c49bb872f7..2b1a1b1a48 100644
--- a/plots/histogram-stacked/metadata/bokeh.yaml
+++ b/plots/histogram-stacked/metadata/bokeh.yaml
@@ -23,3 +23,175 @@ review:
     users; consider using more distinct hues'
   - Does not use ColumnDataSource which is idiomatic Bokeh practice
   - Some test scores exceed 100 points which is atypical for standard tests
+  image_description: 'The plot displays a stacked histogram showing test score distributions
+    for three study groups. The x-axis shows "Test Score (points)" ranging from approximately
+    40 to 120, and the y-axis shows "Frequency (count)" from 0 to about 83. Three
+    colored segments are stacked: dark blue (Self-study Group) at bottom, yellow (Regular
+    Study Group) in middle, and light blue (Intensive Study Group) on top. The distributions
+    show realistic patterns with Self-study peaking around 65, Regular around 75,
+    and Intensive around 82. A legend is positioned in the top-right corner. White
+    line separators between bar segments provide clear visual distinction. The title
+    "histogram-stacked · bokeh · pyplots.ai" appears at top-left.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with 0.85 width factor, alpha 0.9 provides good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good contrast between groups, but two blues (dark and light) could
+          be confused by some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, though legend is slightly far from data
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Test Score (points)", "Frequency
+          (count)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend colors don't match plot colors (Self-study shows as darker
+          blue in legend but appears lighter in some contexts)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked histogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous values binned correctly, categories stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Same bin boundaries, distinct colors, legend present, total height
+          = combined frequency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match group names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: histogram-stacked · bokeh · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct distributions with different means and spreads,
+          though more variation in spreads would better demonstrate the plot type
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores from different study methods is a plausible, neutral
+          educational scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores in 40-120 range are reasonable for tests, though some values
+          exceed 100 which is unusual for typical tests
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Basic vbar usage, could leverage ColumnDataSource and HoverTool for
+          better Bokeh idioms
+  verdict: APPROVED
diff --git a/plots/histogram-stacked/metadata/highcharts.yaml b/plots/histogram-stacked/metadata/highcharts.yaml
index 8d5dcf26dc..e23ebbdc8e 100644
--- a/plots/histogram-stacked/metadata/highcharts.yaml
+++ b/plots/histogram-stacked/metadata/highcharts.yaml
@@ -26,3 +26,177 @@ review:
     mismatch)
   - Axis labels lack units (e.g., could be "Measurement Range (units)" and "Frequency
     (count)")
+  image_description: The plot displays a stacked column chart representing measurement
+    distributions across three sensor types. The visualization uses 15 bins spanning
+    from 14-97 on the x-axis ("Measurement Range"). Blue bars represent Sensor A (centered
+    around higher values ~45-55), yellow bars represent Sensor B (centered ~55-65),
+    and purple bars represent Sensor C (centered ~65-75). The stacked bars clearly
+    show combined frequencies reaching up to ~57 at the peak (52-58 bin). The title
+    "histogram-stacked · highcharts · pyplots.ai" appears prominently at the top with
+    a subtitle "Measurement Distribution by Sensor Type". A vertical legend is positioned
+    in the top-right corner. The layout is clean with good use of white space and
+    subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 72px, axis labels at 48px, tick labels at 28-36px - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, bin labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are clearly visible with appropriate width and white borders
+          separating stacked segments
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses colorblind-safe palette (blue #306998, yellow #FFD43B, purple
+          #9467BD) - no red-green conflicts'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good proportions, plot fills canvas well, minor: extra bottom margin
+          creates slight imbalance'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Measurement Range", "Frequency") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), legend well placed but could be slightly
+          larger
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked histogram using column chart with stacking
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous values binned correctly, categories stacked properly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: same bin boundaries for all groups, distinct
+          colors, legend with group labels, total bar height shows combined frequency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis scales appropriately to max frequency
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Sensor A, B, C
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-stacked · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three overlapping distributions with different centers, demonstrates
+          stacking well; could show more variation in spread
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sensor measurement data is a plausible, neutral scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 14-97 are reasonable for generic measurements; sample sizes
+          (150, 120, 100) are appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but image dimensions are 4800x2561 instead of 4800x2700
+          (marginBottom may cause render height mismatch)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Highcharts stacking options, ColumnSeries, and chart
+          configuration; could leverage tooltips or data labels for interactivity
+  verdict: APPROVED
diff --git a/plots/histogram-stacked/metadata/letsplot.yaml b/plots/histogram-stacked/metadata/letsplot.yaml
index aa0e9a18dc..e0cb4ab116 100644
--- a/plots/histogram-stacked/metadata/letsplot.yaml
+++ b/plots/histogram-stacked/metadata/letsplot.yaml
@@ -24,3 +24,176 @@ review:
   - Could use more distinctive lets-plot features (e.g., tooltips, flavor themes)
   - Color palette includes red and yellow which may be suboptimal for red-green colorblind
     users
+  image_description: 'The plot shows a stacked histogram displaying test scores from
+    three different classes (A, B, C). The x-axis shows "Test Score (points)" ranging
+    from ~40 to 105, and the y-axis shows "Number of Students" ranging from 0 to 75.
+    Three distinct colors are used: blue (Class A), yellow (Class B), and red (Class
+    C). The bars are stacked, with Class A at the bottom, Class B in the middle, and
+    Class C on top. The distribution is roughly bell-shaped, peaking around the 70-75
+    score range. White borders separate the stacked segments within each bin. A legend
+    on the right identifies the three classes. The title follows the required format:
+    "histogram-stacked · lets-plot · pyplots.ai". The overall layout uses a minimal
+    theme with subtle grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars are well-sized with good visibility, white borders
+          help distinguish segments
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/red are distinguishable; yellow and red could be challenging
+          for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot is well-centered with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Test Score (points)"
+          and "Number of Students"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle but legend is positioned far to the right with excessive
+          whitespace
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked histogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable on X, categories properly stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: stacking, distinct colors, legend, same
+          bin boundaries'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all three classes
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "histogram-stacked · lets-plot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple groups with different distributions, though distributions
+          could show more variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores from three classes is a realistic, neutral educational
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores in 0-100 range are realistic; clipping applied appropriately
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly but doesn't leverage lets-plot specific
+          features like tooltips or interactive capabilities in a distinctive way
+  verdict: APPROVED
diff --git a/plots/histogram-stacked/metadata/matplotlib.yaml b/plots/histogram-stacked/metadata/matplotlib.yaml
index 9c595eaa1d..7b7549ad12 100644
--- a/plots/histogram-stacked/metadata/matplotlib.yaml
+++ b/plots/histogram-stacked/metadata/matplotlib.yaml
@@ -24,3 +24,176 @@ review:
   - No distinctive matplotlib features beyond basic histogram; could use annotations,
     custom spine styling, or statistical overlays
   - Legend visual separation could be enhanced with a border or better background
+  image_description: 'The plot displays a stacked histogram showing server response
+    times (in milliseconds) across three geographic regions. The x-axis shows "Response
+    Time (ms)" ranging from approximately 10 to 140ms. The y-axis shows "Number of
+    Requests" ranging from 0 to about 75. Three colored segments are stacked: US-East
+    (blue/#306998), Europe (yellow/#FFD43B), and Asia-Pacific (green/#5BA85B). The
+    US-East distribution peaks around 45ms, Europe around 65ms, and Asia-Pacific extends
+    to higher values around 80-100ms. The title reads "histogram-stacked · matplotlib
+    · pyplots.ai" and a legend is positioned in the upper right corner. White edge
+    lines separate the bars, and a subtle y-axis grid is visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars are well-sized with good alpha (0.9) and white edge
+          lines for distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green are distinguishable and avoid red-green issues,
+          though yellow-green adjacent could be slightly better
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Response Time (ms)" and "Number
+          of Requests"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid only on y-axis is good, but legend could use slightly better
+          visual separation
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked histogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable (response time) binned, category (region) for
+          stacking
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Same bin boundaries for all groups, distinct colors, legend with
+          labels, total height shows combined frequency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify all three regions
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-stacked · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows overlapping distributions, different group sizes (200, 180,
+          150), varying means and spreads
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response times by region is a realistic, neutral technical
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times 10-160ms are realistic for web services
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Basic ax.hist() with stacked=True is standard usage, no distinctive
+          matplotlib features like custom styling, annotations, or advanced formatting
+  verdict: APPROVED
diff --git a/plots/histogram-stacked/metadata/plotly.yaml b/plots/histogram-stacked/metadata/plotly.yaml
index 964a355b4d..db4e46ca43 100644
--- a/plots/histogram-stacked/metadata/plotly.yaml
+++ b/plots/histogram-stacked/metadata/plotly.yaml
@@ -23,3 +23,177 @@ review:
   - Uses manual histogram calculation with go.Bar instead of more idiomatic Plotly
     histogram traces
   - Does not leverage Plotly interactive features like custom hover templates
+  image_description: 'The plot displays a stacked histogram showing product weight
+    distributions from three production lines. The x-axis shows "Product Weight (g)"
+    ranging from approximately 170g to 370g, and the y-axis shows "Frequency" from
+    0 to 70. Three distinct colors are used: dark blue (#306998) for Line A at the
+    bottom of each stack, yellow (#FFD43B) for Line B in the middle, and teal (#4ECDC4)
+    for Line C on top. The title "histogram-stacked · plotly · pyplots.ai" is centered
+    at the top. A legend in the upper right corner identifies each production line
+    with a subtle white background and border. The distributions overlap meaningfully,
+    with Line A centered around 250g, Line B around 280g, and Line C around 260g.
+    Bars show proper stacking where total height represents combined frequency.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths are well-proportioned, colors clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good contrast, blue/yellow/teal palette is colorblind-friendly, though
+          yellow on white could be slightly better
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Product Weight (g)" and "Frequency" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend is well-placed but could have slightly more
+          contrast
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked histogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable (weight) properly binned, categories properly
+          stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Same bin boundaries for all groups, distinct colors, legend with
+          group labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Line A, B, C
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "histogram-stacked · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overlapping distributions with different centers and spreads,
+          demonstrates stacking well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product weights from production lines is a realistic manufacturing
+          QC scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for product weights, though 150 samples per
+          group is modest
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Bar with barmode="stack" which is correct but basic; could
+          leverage plotly.express histogram with color parameter for more idiomatic
+          approach, or add hover templates for interactivity
+  verdict: APPROVED
diff --git a/plots/histogram-stacked/metadata/plotnine.yaml b/plots/histogram-stacked/metadata/plotnine.yaml
index 9418882a21..55bc066655 100644
--- a/plots/histogram-stacked/metadata/plotnine.yaml
+++ b/plots/histogram-stacked/metadata/plotnine.yaml
@@ -22,3 +22,161 @@ review:
   weaknesses:
   - Could use scale_fill_brewer for more colorblind-safe colors instead of manual
     hex values
+  image_description: 'The plot displays a stacked histogram showing plant height distributions
+    across three soil types. The x-axis shows "Plant Height (cm)" ranging from 10
+    to 50, and the y-axis shows "Frequency" from 0 to about 68. Three distinct colors
+    represent the soil types: dark blue (#306998) for Clay Soil, yellow (#FFD43B)
+    for Loamy Soil, and teal (#4ECDC4) for Sandy Soil. The bars are stacked on top
+    of each other, with white borders separating segments (alpha=0.85). The distributions
+    show different peaks: Sandy Soil peaks around 25cm, Loamy Soil around 30cm, and
+    Clay Soil around 22cm. The legend is positioned on the right side. The title "histogram-stacked
+    · plotnine · pyplots.ai" is prominently displayed at the top. The plot uses a
+    minimal theme with subtle gray gridlines on a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text perfectly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with good alpha and white borders
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors distinguishable but could be more colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent proportions, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Plant Height (cm)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle, legend placement acceptable
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked histogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y and fill correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All spec features present (stacking, colors, legend)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correct and descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: histogram-stacked · plotnine · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows three distinct distributions with different means and spreads
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Excellent plant growth scenario (science/agriculture)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic plant heights (10-50 cm)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Fixed seed (42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/histogram-stacked/metadata/pygal.yaml b/plots/histogram-stacked/metadata/pygal.yaml
index b5a1b7c931..791a8816b7 100644
--- a/plots/histogram-stacked/metadata/pygal.yaml
+++ b/plots/histogram-stacked/metadata/pygal.yaml
@@ -22,3 +22,178 @@ review:
   weaknesses:
   - Legend position in top-left corner could be moved to avoid proximity to plot area
   - Could leverage more pygal-specific features like custom tooltips or value labels
+  image_description: 'The plot displays a stacked bar chart simulating a stacked histogram
+    of plant height measurements in centimeters. Three groups are shown: Shade-grown
+    (blue, #306998), Partial-sun (yellow, #FFD43B), and Full-sun (green, #4CAF50).
+    The x-axis displays 12 bin ranges from "5-10" to "60-65" cm with the label "Plant
+    Height (cm)". The y-axis shows "Frequency" ranging from 0 to approximately 80.
+    The title correctly reads "histogram-stacked · pygal · pyplots.ai" at the top
+    center. The legend is positioned in the top-left corner showing all three group
+    labels. The bars are properly stacked, with the blue (Shade-grown) distribution
+    centered around 20-30 cm, yellow (Partial-sun) around 30-40 cm, and green (Full-sun)
+    around 40-50 cm. The overall layout is clean with a white background and subtle
+    horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all readable. Font sizes
+          are appropriate for the 4800x2700 canvas. Legend text is slightly small
+          but still legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. X-axis labels are well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible. Stacking is clearly distinguishable.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green provide good contrast. Not a red-green combination,
+          but blue-yellow-green could be slightly improved for deuteranopia.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins. Legend is appropriately
+          positioned.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has "Plant Height (cm)" with units, Y-axis has "Frequency".
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid lines are subtle (good), but legend placement in top-left corner
+          overlaps with the plot area slightly and could be better positioned.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements stacked histogram using pygal's StackedBar chart.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows bin ranges, Y shows frequency, stacking shows group contributions.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: stacked bars, distinct colors, legend
+          with group labels, same bin boundaries for all groups.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the chart bounds.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly show "Shade-grown", "Partial-sun", "Full-sun".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows exact format: "histogram-stacked · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct distributions with different means and spreads.
+          Could benefit from more overlap between groups to better demonstrate stacking.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth measurements under different sunlight conditions is
+          a realistic, neutral scientific scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Plant heights of 5-65 cm are reasonable, though the distributions
+          could have more natural variation.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart config → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: numpy, pygal, Style.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses pygal's StackedBar and custom Style, but doesn't leverage more
+          distinctive pygal features like tooltips configuration or value formatters.
+  verdict: APPROVED
diff --git a/plots/histogram-stacked/metadata/seaborn.yaml b/plots/histogram-stacked/metadata/seaborn.yaml
index 05ced411b0..d8ddfc1cef 100644
--- a/plots/histogram-stacked/metadata/seaborn.yaml
+++ b/plots/histogram-stacked/metadata/seaborn.yaml
@@ -25,3 +25,176 @@ review:
     with tall bars
   - Color palette could use a more colorblind-optimized palette like colorblind or
     Set2
+  image_description: 'The plot displays a stacked histogram showing server response
+    times (in milliseconds) across three geographic regions. The x-axis shows "Response
+    Time (ms)" ranging from approximately 10 to 120ms, while the y-axis shows "Frequency"
+    from 0 to about 55. Three distinct colors are used: blue (#306998) for US East,
+    yellow/gold (#FFD43B) for Europe, and green (#4DAF4A) for Asia Pacific. The bars
+    are stacked vertically, with US East at the bottom, Europe in the middle, and
+    Asia Pacific on top. The title reads "histogram-stacked · seaborn · pyplots.ai"
+    and a legend labeled "Server Region" is positioned in the upper right corner.
+    The distribution shows US East having faster response times (centered around 45ms),
+    Europe in the middle (around 60ms), and Asia Pacific with slower times (around
+    75ms). Subtle gray dashed gridlines appear on the y-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Histogram bars well-sized, white edges provide good separation between
+          stacked segments
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/green palette is distinguishable, though blue and green
+          could be slightly closer for some colorblind viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" includes units, "Frequency" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but legend placement could be better
+          positioned (slightly overlaps potential data area)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stacked histogram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous variable on x-axis, categories for stacking
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Same bin boundaries, distinct colors, legend with group labels, stacking
+          shows combined frequency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes encompass full distribution
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data groups correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: histogram-stacked · seaborn · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three overlapping distributions with different centers and
+          spreads, demonstrates stacking well. Could show more dramatic differences
+          between groups.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response times by region is a realistic, neutral tech scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times of 15-120ms are realistic for server latency
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with hue parameter correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of sns.histplot with multiple="stack" parameter, hue_order
+          for controlling stack order, and seaborn's elegant palette handling
+  verdict: APPROVED
diff --git a/plots/histogram-stepwise/metadata/altair.yaml b/plots/histogram-stepwise/metadata/altair.yaml
index eb182fa256..9c435963b9 100644
--- a/plots/histogram-stepwise/metadata/altair.yaml
+++ b/plots/histogram-stepwise/metadata/altair.yaml
@@ -24,3 +24,170 @@ review:
   - Single distribution only - spec mentions overlaying multiple distributions as
     an application
   - Could leverage Altair interactive features like tooltips for the HTML output
+  image_description: The plot displays a step histogram (outline only, no fill) with
+    a blue line color (#306998). The x-axis is labeled "Measurement Value" ranging
+    from approximately 6 to 70, and the y-axis is labeled "Frequency" ranging from
+    0 to 70. The title reads "histogram-stepwise · altair · pyplots.ai" at the top
+    center. The data shows a bimodal distribution with one peak around 25-27 (frequency
+    ~67) and a smaller second peak around 44-46 (frequency ~31). The step-line visualization
+    creates clear horizontal segments at each bin's count level connected by vertical
+    segments, exactly as specified. The layout is clean with subtle grid lines, no
+    fill in the histogram, and excellent use of the canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line stroke width of 3 is excellent for visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is accessible
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units specified
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at 0.3 alpha is good, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step histogram with outline only
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = measurement values, Y = frequency counts
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step lines, no fill, horizontal segments at count levels, vertical
+          connectors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate axis scaling
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series step histogram
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: histogram-stepwise · altair · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Bimodal distribution shows step histogram well, but only one distribution
+          (spec mentions overlaying)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Neutral "Measurement Value" context is appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable, 500 data points is good
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used and all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding and mark_line with interpolate,
+          but could leverage more distinctive features like tooltips or interactivity
+  verdict: APPROVED
diff --git a/plots/histogram-stepwise/metadata/bokeh.yaml b/plots/histogram-stepwise/metadata/bokeh.yaml
index 4e8c832fb7..3e01728588 100644
--- a/plots/histogram-stepwise/metadata/bokeh.yaml
+++ b/plots/histogram-stepwise/metadata/bokeh.yaml
@@ -24,3 +24,179 @@ review:
   - Legend appears small relative to other text elements despite having label_text_font_size
     set
   - Could demonstrate more variety in the distributions (different spreads or shapes)
+  image_description: The plot shows two step histograms (outline only, no fill) comparing
+    temperature distributions from two sensors. The blue line represents "Sensor A"
+    with a distribution centered around 50°C, and the yellow/gold line represents
+    "Sensor B" centered around 65°C. The x-axis shows "Temperature (°C)" ranging from
+    approximately 15 to 100, and the y-axis shows "Frequency" from 0 to about 56.
+    The title reads "histogram-stepwise · bokeh · pyplots.ai" in the top-left. A legend
+    in the top-right corner identifies the two sensors. The background is white with
+    subtle dashed grid lines. Both distributions show the characteristic step function
+    appearance with horizontal segments at count levels connected by vertical segments.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step lines are visible with good line width (5px), though the two
+          distributions do overlap in the middle region which is acceptable for comparison
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though there is some unused space on the
+          right side of the plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Temperature (°C)" and "Frequency"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is positioned in top-right corner but appears quite small
+          relative to the canvas; the legend text is noticeably smaller than other
+          text elements in the plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step histogram with outline only, no fill
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows continuous values, Y-axis shows frequency counts
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step lines with horizontal segments at count levels and vertical
+          connectors between bins
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies both distributions
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-stepwise · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two overlapping distributions for comparison as suggested in
+          spec, demonstrates the advantage of step histograms for comparing distributions;
+          minor deduction as distributions could show more variety (e.g., different
+          spreads)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature sensor readings is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values are reasonable for sensor readings, though 65°C
+          average for a sensor might be high for typical ambient conditions
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also saves plot.html which is fine for bokeh
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses basic Bokeh line plotting and export features; could leverage
+          more Bokeh-specific features like hover tools or better interactivity in
+          the HTML output
+  verdict: APPROVED
diff --git a/plots/histogram-stepwise/metadata/letsplot.yaml b/plots/histogram-stepwise/metadata/letsplot.yaml
index 7d02002a56..42aab0be4d 100644
--- a/plots/histogram-stepwise/metadata/letsplot.yaml
+++ b/plots/histogram-stepwise/metadata/letsplot.yaml
@@ -24,3 +24,180 @@ review:
   weaknesses:
   - Could leverage lets-plot interactive tooltip features for the HTML output
   - Legend title is slightly generic compared to a more descriptive label
+  image_description: The plot displays two step histogram distributions comparing
+    morning and afternoon temperature readings. The morning distribution (dark blue/navy
+    line) is centered around 18°C with a peak frequency near 58, while the afternoon
+    distribution (golden yellow line) is centered around 26°C with a peak frequency
+    near 52. Both distributions are rendered as connected step lines (outline only,
+    no fill) creating a classic step function appearance. The x-axis shows "Temperature
+    (°C)" ranging from 4 to 40, and the y-axis shows "Frequency" ranging from 0 to
+    60. A legend on the right identifies "Morning" and "Afternoon" under "Time Period".
+    The title reads "histogram-stepwise · letsplot · pyplots.ai". The background uses
+    a minimal theme with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend text are all clearly readable
+          at the output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step lines are well-sized (size=2.5) and clearly visible; minor deduction
+          as line width could be slightly thicker for even better visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe (not red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot fills appropriate area with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Temperature (°C)"
+          and "Frequency"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate, but legend title shows "Time Period"
+          which is slightly generic; legend placement is good
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step histogram with outline-only lines (no fill)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous temperature values correctly binned and displayed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step lines, no fill, multiple overlaid distributions for comparison
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show complete data range from ~4°C to ~40°C
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Morning and Afternoon distributions
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "histogram-stepwise · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two overlaid distributions demonstrating the key use case;
+          distributions overlap in the middle showing comparison benefit; minor deduction
+          for not showing more extreme distribution shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature readings comparing morning vs afternoon is a real, neutral,
+          comprehensible scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (morning ~18°C, afternoon ~26°C);
+          minor deduction as the spread could show more realistic weather variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html correctly, but uses path="." parameter
+          style
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with geom_line, scale_color_manual,
+          theme_minimal, and custom theme elements; however, doesn't leverage lets-plot's
+          interactive tooltips or other distinctive features beyond basic ggplot grammar
+  verdict: APPROVED
diff --git a/plots/histogram-stepwise/metadata/matplotlib.yaml b/plots/histogram-stepwise/metadata/matplotlib.yaml
index 1a08e3f731..7e5a252697 100644
--- a/plots/histogram-stepwise/metadata/matplotlib.yaml
+++ b/plots/histogram-stepwise/metadata/matplotlib.yaml
@@ -25,3 +25,174 @@ review:
     upper left or outside plot
   - Both distributions have similar shapes (normal); varying the spread or adding
     a third distribution would better showcase the comparison capability
+  image_description: The plot displays two step histograms comparing temperature distributions.
+    The blue line ("Morning") shows a distribution centered around 15°C with a peak
+    frequency of ~29, while the yellow/gold line ("Afternoon") shows a distribution
+    centered around 22-24°C with a peak frequency of ~24. Both distributions are rendered
+    as step outlines only (no fill), creating clean overlapping comparisons. The title
+    reads "histogram-stepwise · matplotlib · pyplots.ai" at the top. X-axis shows
+    "Temperature (°C)" ranging from ~5 to ~37, Y-axis shows "Frequency" ranging from
+    0 to 30. A legend in the upper right identifies the two series. A subtle dashed
+    grid aids readability.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend positioned away from data
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step lines at linewidth=3 are clearly visible and well-suited for
+          the data
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, good use of 16:9 aspect
+          ratio
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" and "Frequency" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but legend could be better positioned
+          to avoid overlap with data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step histogram using histtype="step"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values correctly binned and counted
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step lines (outline only), no fill, multiple distributions overlaid
+          for comparison
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Morning and Afternoon
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "histogram-stepwise · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows two overlapping distributions well, but distributions could
+          show more distinct characteristics (e.g., different spreads or shapes)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Morning vs afternoon temperature comparison is a real, neutral, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (5-37°C) are realistic for daily temperature variations
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib's histtype="step" correctly, but could leverage additional
+          features like different linestyles for better distinction
+  verdict: APPROVED
diff --git a/plots/histogram-stepwise/metadata/plotnine.yaml b/plots/histogram-stepwise/metadata/plotnine.yaml
index 805629f531..701df2a383 100644
--- a/plots/histogram-stepwise/metadata/plotnine.yaml
+++ b/plots/histogram-stepwise/metadata/plotnine.yaml
@@ -25,3 +25,181 @@ review:
   - Legend placement inside the plot area could potentially overlap with data in some
     cases; consider placing outside
   - Manual histogram computation instead of exploring plotnine native histogram capabilities
+  image_description: The plot displays two overlapping step histograms showing response
+    time distributions for server configurations. The blue line represents "Optimized
+    Setup" (centered around 180ms with a tighter distribution) and the yellow/gold
+    line represents "Standard Setup" (centered around 250ms with a wider spread).
+    The plot uses a clean white background with subtle gray grid lines. The title
+    "histogram-stepwise · plotnine · pyplots.ai" is prominently displayed at the top.
+    X-axis shows "Response Time (ms)" ranging from about 50 to 500, Y-axis shows "Frequency"
+    ranging from 0 to 60. A legend in the right portion of the plot area identifies
+    the two configurations. The step lines are thick and clearly visible with good
+    contrast.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text clearly readable: title at 24pt, axis labels at 20pt, tick
+          labels at 16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Step lines are thick (size=2) and visible, alpha=0.9 provides good
+          contrast. Minor: where distributions overlap, both are still distinguishable'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) have good contrast and are distinguishable
+          for most color vision deficiencies. Not a perfect colorblind-safe palette
+          but acceptable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Response Time (ms)"
+          and "Frequency"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriately subtle. However, the legend placement inside
+          the plot area is acceptable but could be positioned better to avoid potential
+          overlap with data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step histogram (outline only, no fill)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows continuous variable values, Y-axis shows frequency counts
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows step lines without fill, horizontal segments at count levels,
+          vertical segments connecting bins, demonstrates overlaying multiple distributions
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies both configurations
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-stepwise · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two overlapping distributions with different means and spreads,
+          demonstrating the main use case. Could show more distinct shapes (e.g.,
+          skewed distribution)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response time comparison is a realistic, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times of 180ms and 250ms are realistic for server responses.
+          Range is sensible.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_step, theme_minimal, and proper theming.
+          However, manually computing histogram bins rather than using geom_histogram
+          with stat approach is a workaround rather than leveraging plotnine's native
+          capabilities
+  verdict: APPROVED
diff --git a/plots/histogram-stepwise/metadata/pygal.yaml b/plots/histogram-stepwise/metadata/pygal.yaml
index 7e30872074..589f17116e 100644
--- a/plots/histogram-stepwise/metadata/pygal.yaml
+++ b/plots/histogram-stepwise/metadata/pygal.yaml
@@ -23,3 +23,176 @@ review:
   weaknesses:
   - Axis labels lack units (e.g., Value (units) or Frequency (count))
   - Could use more realistic/contextual data scenario instead of abstract values
+  image_description: 'The plot displays a step histogram showing a bimodal distribution
+    on a white background. A blue step line (Python blue #306998) outlines the histogram
+    without any fill, creating a clean step-function appearance. The title "histogram-stepwise
+    · pygal · pyplots.ai" appears at the top in dark gray text. The X-axis is labeled
+    "Value" (ranging approximately 20-110) and the Y-axis is labeled "Frequency" (ranging
+    0-38). The bimodal distribution shows two distinct peaks: one centered around
+    value 50 with frequency ~38, and another around value 85 with frequency ~27. Horizontal
+    grid lines are visible at regular intervals. The step lines correctly connect
+    horizontal segments (bin counts) with vertical segments between adjacent bins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          4800x2700 resolution with appropriately sized fonts
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step lines are clearly visible with appropriate stroke width (4px)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color on white background, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but some wasted space at the top; plot could be
+          slightly larger
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Value", "Frequency") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Horizontal grid is subtle and helpful; no legend needed for single
+          series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step histogram with outline only, no fill
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows value bins, Y-axis shows frequency counts
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step lines present, no fill, horizontal segments at count levels,
+          vertical connections between bins
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete distribution
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, correctly omitted with show_legend=False
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-stepwise · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Bimodal distribution excellently demonstrates the step histogram's
+          ability to show complex distributions
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Generic numeric data; could represent measurement distributions but
+          context is abstract
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in 20-110 range with frequencies up to ~38 are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → histogram computation → chart
+          creation → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducible results
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Saves both plot.png and plot.html, but should save to plot.png (minor:
+          both are saved)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of pygal.XY with show_dots=False and fill=False to create
+          step effect; custom Style for sizing; however, pygal has a native Histogram
+          chart type that wasn't used
+  verdict: APPROVED
diff --git a/plots/histogram-stepwise/metadata/seaborn.yaml b/plots/histogram-stepwise/metadata/seaborn.yaml
index 6c014d59c4..73b7c0f5a8 100644
--- a/plots/histogram-stepwise/metadata/seaborn.yaml
+++ b/plots/histogram-stepwise/metadata/seaborn.yaml
@@ -24,3 +24,165 @@ review:
   - Data scenario is functional but generic - could use more specific service names
     or context
   - Grid alpha at 0.3 is acceptable but could be slightly more subtle at 0.2
+  image_description: The plot displays two step histograms showing response time distributions
+    for two services. Service A is shown in blue (#306998) and Service B in yellow/gold
+    (#FFD43B). Both histograms use step lines (outline only, no fill) as required
+    by the spec. The x-axis shows "Response Time (ms)" ranging from 0 to ~700ms, and
+    the y-axis shows "Count" ranging from 0 to ~130. Service A (blue) has a tighter
+    distribution peaking around 20-40ms with counts up to ~92, while Service B (yellow)
+    has a wider distribution peaking around 40-60ms with counts up to ~130. Both show
+    right-skewed (exponential) distributions. The title follows the correct format
+    "histogram-stepwise · seaborn · pyplots.ai". A legend is placed in the upper right
+    corner. A subtle dashed grid is visible in the background.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step lines with linewidth=3 are clearly visible, two distributions
+          well distinguished
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins with tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Response Time (ms)" and "Count"'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step histogram with outline only, no fill
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous values correctly binned and counted
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step lines, no fill, multiple distributions for comparison
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels "Service A" and "Service B" are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "histogram-stepwise · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows overlapping distributions with different shapes, demonstrates
+          comparison use case
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Response times for services is plausible but somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times 20-700ms are realistic for web services
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses seaborn's histplot with element='step' and fill=False - good
+          use of seaborn's histogram API, though could use additional seaborn styling
+          features
+  verdict: APPROVED
diff --git a/plots/hive-basic/metadata/highcharts.yaml b/plots/hive-basic/metadata/highcharts.yaml
index 6fc760632e..0573c6040a 100644
--- a/plots/hive-basic/metadata/highcharts.yaml
+++ b/plots/hive-basic/metadata/highcharts.yaml
@@ -25,3 +25,178 @@ review:
   - Does not use highcharts-core Python library as recommended in library rules (uses
     raw JS generation instead)
   - Code could benefit from an explicit comment noting deterministic data for reproducibility
+  image_description: 'The plot displays a hive plot visualization of a software module
+    dependency network. Three radial axes extend from a central gray hub: "Core" (blue,
+    pointing upward), "Utility" (yellow, pointing to lower-right), and "Interface"
+    (purple, pointing to lower-left). Each axis has 4 labeled nodes (Core: Main, Engine,
+    Database, Auth; Utility: Logger, Config, Cache, Format; Interface: API, Web, CLI,
+    WebSocket). Curved edges connect nodes between axes, with edge thickness representing
+    dependency weight. The plot uses a colorblind-safe palette (blue #306998, yellow
+    #FFD43B, purple #9467BD). A legend in the upper-right shows Module Types with
+    colored circles and Edge Weight with line thickness examples. The title "hive-basic
+    · highcharts · pyplots.ai" appears at the top. White background with good contrast.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, axis labels at 48px bold, node labels at 36px - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; node labels positioned intelligently based on
+          axis
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes 32px radius with 5px white stroke visible; edges use appropriate
+          thickness (5-9px) with 0.5 opacity - slightly dense at hub
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue/yellow/purple), excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No traditional X/Y axes (hive plot has radial axes with category
+          labels, which is appropriate for this plot type)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-designed with Module Types and Edge Weight guide;
+          no intrusive grid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct hive plot with 3 radial axes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly assigned to axes by module type, positioned by degree/importance
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features: 3 axes, node positioning, curved edges, edge
+          weights, central hub'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes visible along axes, edges connect correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately describes Module Types and Edge Weight
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "hive-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: 3 module categories, varying edge weights (1-3),
+          different connection patterns between axes'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Software module dependency network is plausible and relatable; module
+          names are realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 12 nodes across 3 axes, 16 edges with sensible weights (1-3)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → coordinate calculation → Highcharts
+          rendering → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Deterministic data (no random seed needed), but no explicit seed
+          comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts renderer API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts SVGRenderer API for custom drawing, interactive tooltips
+          on hover, but doesn't use highcharts-core Python library
+  verdict: APPROVED
diff --git a/plots/hive-basic/metadata/letsplot.yaml b/plots/hive-basic/metadata/letsplot.yaml
index 30f0c4483c..92bbccdd34 100644
--- a/plots/hive-basic/metadata/letsplot.yaml
+++ b/plots/hive-basic/metadata/letsplot.yaml
@@ -25,3 +25,184 @@ review:
   - Title format includes extra descriptive text before the required format
   - Edge curves could be slightly less transparent in the center region where they
     overlap
+  image_description: 'The plot displays a hive plot visualization for software module
+    dependencies. Three radial axes extend from near the center: **Core** (pointing
+    right/east), **Utility** (pointing upper-left), and **Interface** (pointing lower-left).
+    Each axis has nodes (circles) positioned along it with sizes representing degree
+    (number of connections). Core modules are shown in blue (#306998), Utility modules
+    in yellow (#FFD43B), and Interface modules in green (#34A853). Curved Bezier edges
+    connect the nodes, colored by type: blue for within-Core, yellow for within-Utility,
+    green for within-Interface, and gray for between-axes connections. The title at
+    the top reads "Software Module Dependencies · hive-basic · letsplot · pyplots.ai".
+    A comprehensive legend on the right shows Edge Type, Connections (size scale 2-8),
+    and Module Type. The background is white with axis labels in bold black text near
+    each axis terminus.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title is large and readable (24pt), axis labels are bold and clear.
+          Legend text is appropriately sized. Minor: tick marks not present but appropriate
+          for this plot type.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, axis labels well separated from nodes
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes are clearly visible with good sizing scaled by degree. Edges
+          have appropriate alpha (0.6) for visibility without overwhelming. Slightly
+          dense in the center.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green provide good contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot is well-centered, uses canvas effectively, legend positioned
+          appropriately on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No numeric axis labels or units (N/A for hive plots - radial position
+          represents degree ordering)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is comprehensive with three sections (Edge Type, Connections,
+          Module Type), axis lines are subtle gray
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct hive plot with radial axes, nodes positioned by attribute,
+          curved edges
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly assigned to axes by module type, positioned by degree
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 axes, nodes with degree, edges with
+          bundling-like curves, realistic software dependency scenario'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes visible, radial range from 0.3 to 0.95 works well
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately describes edge types, node sizes, and module types
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title includes spec-id, library, pyplots.ai but adds descriptive
+          prefix "Software Module Dependencies" which is acceptable but not the exact
+          format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows within-axis and between-axis connections, varying node degrees,
+          but no especially complex patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software module dependencies is an excellent real-world scenario
+          matching spec's application examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 15 nodes across 3 axes is appropriate, degree values (2-8) are realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' but ggsave path parameter may cause issues
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, geom_path for curves, geom_point with size mapping,
+          theme_void for clean look. Could leverage more lets-plot specific features
+          like tooltips.
+  verdict: APPROVED
diff --git a/plots/hive-basic/metadata/matplotlib.yaml b/plots/hive-basic/metadata/matplotlib.yaml
index 8f7f08f3c9..18551a057d 100644
--- a/plots/hive-basic/metadata/matplotlib.yaml
+++ b/plots/hive-basic/metadata/matplotlib.yaml
@@ -28,3 +28,182 @@ review:
   - Figure size is 12x12 instead of recommended 16x9 default from library rules
   - Node labels could benefit from slightly larger font size (18pt instead of 16pt)
     for better readability
+  image_description: 'The plot shows a hive plot with three radial axes arranged 120
+    degrees apart in a triangular configuration. The axes are labeled "Core" (top,
+    in Python blue #306998), "Utility" (bottom-left, in Python yellow #FFD43B), and
+    "Interface" (bottom-right, in teal #4ECDC4). Each axis contains nodes represented
+    as colored circles with white edge borders. Node labels are positioned adjacent
+    to their respective markers (Engine, Config, Logger, Cache on Core axis; Parser,
+    Validator, Formatter, Converter, Helper on Utility axis; API, Web, CLI, SDK on
+    Interface axis). Curved edges connect nodes through the center point, with edge
+    colors matching their source axis color at reduced opacity (alpha ~0.4). A small
+    white center point marks the origin. The title "Software Module Dependencies"
+    appears at the top with subtitle "hive-basic · matplotlib · pyplots.ai". A legend
+    at the bottom shows the three module categories with their corresponding colors.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text is readable; title 26pt, axis labels 22pt, node labels 16pt
+          - slightly below optimal but still clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements; node labels well positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: nodes s=400 with white edge, axes linewidth=6, edges alpha=0.4 -
+          all perfectly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue/yellow/teal palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: good use of 12x12 square canvas; slight bottom whitespace due to
+          legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: hive plots don't use traditional X/Y axes (N/A for this plot type,
+          no deduction should apply but keeping 0 for strict compliance)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: no grid needed for hive plot; legend well placed at bottom with 3
+          columns
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct hive plot with radial axes and curved edges through center
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: nodes correctly positioned on axes by category, position encodes
+          order/importance
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'all spec features present: 3 axes, node labels, edge connections
+          with transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all nodes visible with proper radii (0.15-0.85)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: legend correctly shows all three module categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: correctly uses "hive-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows cross-axis connections, within-axis connections, and varying
+          node counts per axis; could show more edge weight variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: software module dependency network is realistic and matches spec
+          example exactly
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 13 nodes and 17 edges is appropriate; slight imbalance (4 Core, 5
+          Utility, 4 Interface)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: has one helper function `node_to_cartesian()` which is reasonable
+          for this complex plot but not strictly KISS
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: uses LineCollection for efficient edge rendering and bezier curves,
+          but doesn't leverage more advanced matplotlib features like FancyBboxPatch
+          or custom transforms
+  verdict: APPROVED
diff --git a/plots/hive-basic/metadata/seaborn.yaml b/plots/hive-basic/metadata/seaborn.yaml
index 6636d4f516..38bb8e84d3 100644
--- a/plots/hive-basic/metadata/seaborn.yaml
+++ b/plots/hive-basic/metadata/seaborn.yaml
@@ -22,3 +22,176 @@ review:
   weaknesses:
   - Bottom portion of canvas has significant whitespace due to asymmetric axis placement
   - Title has two lines which deviates slightly from single-line standard format
+  image_description: 'The plot displays a hive plot visualization for software dependencies
+    with three radial axes arranged at 120-degree intervals. The CORE axis (blue/navy
+    color #306998) extends upward from the center, with 10 circular nodes arranged
+    along it. The UTILITY axis (yellow/gold color #FFD43B) extends to the lower-left,
+    also with 10 nodes. The INTERFACE axis (teal/cyan color #4ECDC4) extends to the
+    lower-right with 10 nodes. Gray curved edges connect nodes between axes, showing
+    dependency relationships. The edges have low alpha (transparency) for bundling
+    effect. The title "Software Dependencies / hive-basic · seaborn · pyplots.ai"
+    appears at the top. A legend in the upper-right shows the three module types.
+    The plot uses a white background with the axes off, creating a clean radial network
+    visualization.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, axis labels at 18pt bold, legend at 16pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; axis labels and legend are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes are well-sized (s=400), edges use appropriate alpha (0.25)
+          for density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/teal palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Square format is appropriate for radial plot but the plot could fill
+          more of the canvas; there's notable whitespace at the bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for hive plots (no traditional axes, but axis names are labeled)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate), legend well-placed in upper-right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct hive plot with radial axes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly assigned to axes by module type, positioned by degree
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 axes, node positioning by property,
+          edge bundling via transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 30 nodes and 29 edges displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three module types
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows required format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows inter-axis connections (core↔utility, utility↔interface, core↔interface)
+          and intra-axis connections; could show more varied connection patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software module dependency network is an excellent real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 30 nodes is appropriate per spec (20-100 recommended); degree values
+          1-15 are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.patches, plt, np, pd, sns)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot and sns.lineplot for plotting, sns.set_theme
+          for styling; however the Bezier curve calculation and radial positioning
+          are done manually rather than leveraging any specialized seaborn features
+  verdict: APPROVED
diff --git a/plots/horizon-basic/metadata/altair.yaml b/plots/horizon-basic/metadata/altair.yaml
index 296c618778..81e13bd1f9 100644
--- a/plots/horizon-basic/metadata/altair.yaml
+++ b/plots/horizon-basic/metadata/altair.yaml
@@ -26,3 +26,179 @@ review:
     descriptions like Low, Medium, High intensity
   - Could add interactivity (tooltips) to leverage Altair strengths for exploring
     specific time points
+  image_description: 'The plot displays a horizon chart showing server CPU metrics
+    over a 24-hour period for 6 servers: API Gateway, Cache, Database, Web Server
+    1, Web Server 2, and Worker. Each server is shown in a separate horizontal band/row
+    using Altair''s faceting. The chart uses a blue color scheme (light to dark: #a6c8e0,
+    #306998, #1a3d5c) for positive values and red (light to dark: #f5b7b1, #e74c3c,
+    #922b21) for negative values, with 3 bands of intensity each. The x-axis shows
+    time from 00:00 to 23:00 with labels every hour. Server names appear on the left
+    side of each row. A legend labeled "Band Intensity" is positioned on the right
+    showing the 6 color codes (positive_0, positive_1, positive_2, negative_0, negative_1,
+    negative_2). The title "Server CPU Metrics (24h) · horizon-basic · altair · pyplots.ai"
+    appears at the top left.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable; facet row labels slightly small
+          but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area bands are clearly visible and well-sized for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/red scheme is distinguishable but not ideal for red-green colorblind
+          users; intensity gradients help differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space; faceted rows efficiently fill the
+          vertical space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Time" label is descriptive for the x-axis; no y-axis shown (hidden
+          intentionally for horizon charts)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend labels use technical codes (positive_0, etc.) rather than
+          human-readable descriptions
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizon chart implementation with folded bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values encoded in colored bands, series as facets
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses 3 bands, mirrored positive/negative coloring (blue/red), multiple
+          series comparison
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All time points visible from 00:00 to 23:00
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is present but labels are technical (positive_0) rather than
+          descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "horizon-basic · altair · pyplots.ai" format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows diverse patterns: Database has daily cycle, Cache has spike,
+          Worker has periodic batch processing, Web Servers follow traffic'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server CPU metrics is a perfect real-world scenario for horizon charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values centered around mean (deviation from baseline) are sensible;
+          slight deduction as raw CPU percentages would be more intuitive
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script with imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html but the specification path should
+          be explicit
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's declarative approach with faceting and color
+          scales, but could leverage more Altair-specific features like interactivity
+  verdict: APPROVED
diff --git a/plots/horizon-basic/metadata/bokeh.yaml b/plots/horizon-basic/metadata/bokeh.yaml
index 2a492161f8..d5795284e2 100644
--- a/plots/horizon-basic/metadata/bokeh.yaml
+++ b/plots/horizon-basic/metadata/bokeh.yaml
@@ -27,3 +27,182 @@ review:
   - The color legend at the very top, while present, could be more visually prominent
     and integrated
   - The axis label Hour of Day could include units (e.g., Hour of Day 0-24h)
+  image_description: 'The plot displays 6 stacked horizon charts representing server
+    CPU metrics over a 24-hour period. Each horizontal band represents a different
+    server (Web Server 1, Web Server 2, Database, Cache Server, API Gateway, Load
+    Balancer). The visualization uses blue color gradients (light to dark: #a6cee3,
+    #306998, #08306b) for positive values and orange/red gradients (light to dark:
+    #fdd0a2, #f16913, #8c2d04) for negative values. The band-folding technique is
+    correctly implemented, with color intensity increasing as magnitude increases
+    within each band. A color legend appears at the top explaining the band meanings.
+    The title "horizon-basic · bokeh · pyplots.ai" is displayed at the top center.
+    The x-axis shows "Hour of Day" from 0 to 24, and each series has its name labeled
+    on the left side. The data shows realistic server load patterns with periodic
+    oscillations, noise, and occasional spikes.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt and axis labels at 24pt/20pt are readable; series labels
+          at 26pt are good but could be slightly larger for optimal viewing
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; series are well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Horizon bands are clearly visible with good alpha (0.9); color differentiation
+          is effective but some subtle bands could be more distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/orange palette is colorblind-friendly (deuteranopia/protanopia
+          safe)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space; 6 series stacked vertically with legend
+          on top fills the 4800×2700 canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Hour of Day" is descriptive but lacks units; y-axis deliberately
+          hidden for horizon charts'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid appropriately disabled for horizon charts; legend is present
+          but could be more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizon chart implementation with band folding
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time (hours) on x-axis, values folded into bands correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 3 bands with mirrored positive/negative coloring, baseline at zero,
+          multiple series comparison
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 24-hour range visible, all data within view
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color legend accurately explains band meanings (Low/Medium/High for
+          both positive and negative)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "horizon-basic · bokeh · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive AND negative values, multiple intensity bands, 6 different
+          series with varying patterns; could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server metrics over 24 hours is a perfect real-world use case matching
+          the spec's "Dashboard monitoring" application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible for server metrics; the scale normalization
+          per series is appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plots → save; uses a loop but no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh 3.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as both plot.html and plot.png (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, HoverTool with custom tooltips, varea for
+          filled regions, column layout for stacking; could leverage more Bokeh-specific
+          interactivity features like linked panning
+  verdict: APPROVED
diff --git a/plots/horizon-basic/metadata/highcharts.yaml b/plots/horizon-basic/metadata/highcharts.yaml
index 103931e25b..d268813c76 100644
--- a/plots/horizon-basic/metadata/highcharts.yaml
+++ b/plots/horizon-basic/metadata/highcharts.yaml
@@ -26,3 +26,184 @@ review:
   - Y-axis labels are hidden, making it harder to understand the magnitude scale
   - Color scheme while good is not the most colorblind-safe (tritanopia)
   - Subtitle legend could be more prominent or placed as a traditional legend
+  image_description: 'The plot displays a horizon chart showing "Server CPU Load (24h)"
+    for 6 servers (A through F). Each server has its own horizontal panel/row. The
+    chart uses a blue color scheme (light to dark: #a6cee3 → #1f78b4 → #033860) for
+    positive deviations and a red color scheme (light to dark: #fb9a99 → #e31a1c →
+    #67000d) for negative deviations. The x-axis shows "Hour of Day" from 0 to 24
+    with tick marks at 4-hour intervals. Each server''s time series is displayed as
+    stacked area bands that fold values into color-coded layers. The title follows
+    the required format with subtitle explaining the color legend. Server labels appear
+    on the left of each row. The chart effectively demonstrates the horizon chart
+    concept with multiple overlapping bands showing intensity through color saturation.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and clear (56px), axis labels are readable (32px),
+          server labels visible. Subtitle text explaining colors is slightly small
+          but readable.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, server panels are well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area bands are clearly visible, color intensity differences are distinguishable,
+          data patterns are easy to follow
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses blue/red scheme which has good contrast but is not fully colorblind-safe
+          (red-green is avoided, but red-blue may be challenging for some tritanopia
+          cases). However, the light-to-dark intensity still provides good differentiation.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space, all 6 server panels fill the chart
+          area well with appropriate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Hour of Day" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle grid lines with alpha 0.1. Legend is integrated into subtitle
+          rather than traditional legend placement, which works but is less prominent.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizon chart implementation with folded bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values folded into bands correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series (6), color-coded bands (3 bands), positive/negative
+          coloring, baseline handling
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 24-hour range visible, all data shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtitle explains colors but could be more prominent
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Server CPU Load (24h) · horizon-basic · highcharts
+          · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple series, both positive and negative deviations, varying
+          intensities with 3 bands. Could show more dramatic differences between servers
+          to better demonstrate the comparison capability.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server CPU metrics over 24 hours is a perfect real-world scenario
+          for horizon charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Values are plausible for CPU deviation metrics, but the scale/units
+          are not explicitly shown (deviation from baseline, not absolute %)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple sequential structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses `strict=True` in zip which is fine, but uses manual JSON building
+          instead of highcharts-core library
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Highcharts multi-axis feature to create stacked panels,
+          custom area series with stacking=None for horizon effect. Does not use highcharts-core
+          Python library as recommended in library rules, instead builds JSON manually,
+          but the approach is valid and works correctly.
+  verdict: APPROVED
diff --git a/plots/horizon-basic/metadata/letsplot.yaml b/plots/horizon-basic/metadata/letsplot.yaml
index 2b85e11ae9..6a5f4147e5 100644
--- a/plots/horizon-basic/metadata/letsplot.yaml
+++ b/plots/horizon-basic/metadata/letsplot.yaml
@@ -26,3 +26,179 @@ review:
     resolution'
   - File handling with shutil adds complexity that other libraries do not require
     (lets-plot limitation)
+  image_description: 'The plot displays a 6-panel faceted horizon chart showing Server
+    CPU Usage Deviation for Servers A through F over a week (Monday to Sunday). Each
+    panel uses a purple-orange diverging color scheme: purple shades (+Low, +Medium,
+    +High) for positive deviations and orange shades (-Low, -Medium, -High) for negative
+    deviations. The color intensity increases with magnitude within each band, creating
+    a layered area chart effect. A clear legend labeled "Band Intensity" appears on
+    the right side explaining all six band categories. The title "Server CPU Usage
+    Deviation · horizon-basic · letsplot · pyplots.ai" is prominently displayed at
+    the top. X-axis shows days of the week, Y-axis shows "Folded Value (stacked bands)".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and facet titles are clearly readable. Tick labels
+          are adequate but could be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Area bands are well-sized and clearly visible with good alpha for
+          layering
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Purple-orange diverging scheme is colorblind-friendly (avoids red-green),
+          though purple and dark orange could be closer in luminance
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas with 6 well-proportioned facets and balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis label "Folded Value (stacked bands)" is descriptive but lacks
+          units; X-axis "Day of Week" is good
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-positioned and clear; grid is subtle but panel backgrounds
+          could have more contrast
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizon chart with folded bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, values properly folded into bands with sign differentiation
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: multiple bands (3), positive/negative
+          coloring, multiple time series comparison'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the facets
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 6 band categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{spec-id} · {library} · pyplots.ai" format perfectly
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive and negative deviations, multiple intensity levels,
+          and spikes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server CPU monitoring is a perfect real-world use case for horizon
+          charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 7 days of hourly data with realistic CPU deviation patterns
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: '`shutil` and file manipulation could be considered slightly non-standard
+          for plot code'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: N/A (no deprecated usage)
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly (with workaround for lets-plot output
+          directory)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar (aes, geom_area, facet_wrap, scale_fill_manual,
+          theme_minimal), though doesn't leverage lets-plot's interactive tooltip
+          features
+  verdict: APPROVED
diff --git a/plots/horizon-basic/metadata/matplotlib.yaml b/plots/horizon-basic/metadata/matplotlib.yaml
index 409e0c325b..181f13b374 100644
--- a/plots/horizon-basic/metadata/matplotlib.yaml
+++ b/plots/horizon-basic/metadata/matplotlib.yaml
@@ -27,3 +27,178 @@ review:
   - Series could show more dramatic variation to better demonstrate the horizon chart
     ability to highlight differences
   - Missing descriptive subtitle explaining what horizon charts do
+  image_description: The plot displays a horizon chart showing 8 server metrics (CPU
+    Load, Memory, Network I/O, Disk I/O, Requests/s, Latency, Queue Depth, Threads)
+    as horizontally stacked panels over 24 hours. Each panel uses blue shades (Python
+    Blue) for positive deviations and red/pink shades for negative deviations, with
+    3 overlapping bands of increasing color intensity (Low/Mid/High). The title "horizon-basic
+    · matplotlib · pyplots.ai" is at the top, with a 6-item legend below it. Series
+    names appear as bold labels on the right side of each row. The x-axis shows datetime
+    ticks (01-15 00 through 01-15 21) with "Time (Hour of Day)" label. The design
+    is clean with no y-axis ticks and minimal spines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is 22pt (slightly under 24pt ideal), labels 14-18pt are readable
+          but could be larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bands are well-sized with good alpha gradations, patterns clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Red scheme is colorblind-friendly, not red-green
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, 8 series fill the vertical space well, balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis label is descriptive but no units (time is self-evident, but
+          "Hours" could be explicit)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-placed and informative, but no grid (acceptable for
+          horizon charts)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizon chart with folded bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, values encoded in color bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 3 bands, positive/negative coloring, multiple series, color intensity
+          encoding
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within normalized range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 6 band types (Low/Mid/High for +/-)
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Uses correct format but bullet separator (·) vs dot is fine; minor:
+          "Horizon Chart" in spec title vs "horizon-basic" used'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive and negative values, varying intensities, spikes;
+          could have more dramatic differences between series
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server metrics over 24 hours is a perfect real-world use case matching
+          the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Normalized values are sensible; some metrics like "Queue Depth" might
+          benefit from more realistic absolute ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions or classes, clean linear flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib.pyplot, matplotlib.colors, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses subplots with shared x-axis, fill_between with alpha blending,
+          text annotations; could leverage more advanced matplotlib features like
+          transforms or custom colormaps
+  verdict: APPROVED
diff --git a/plots/horizon-basic/metadata/plotly.yaml b/plots/horizon-basic/metadata/plotly.yaml
index 3331708ef1..ad82845f49 100644
--- a/plots/horizon-basic/metadata/plotly.yaml
+++ b/plots/horizon-basic/metadata/plotly.yaml
@@ -25,3 +25,168 @@ review:
   - Series labels (Server A-F) could be larger for better readability at full resolution
   - X-axis tick font size (16) is slightly below optimal for 4800x2700 output
   - Color legend annotation text is small and could be more prominent
+  image_description: 'The plot displays a horizon chart with 6 server CPU load time
+    series (Server A through Server F) arranged vertically as subplots over a 24-hour
+    period. Each series uses the horizon chart folding technique: positive deviations
+    from baseline are shown in blue (light to dark shades indicating increasing magnitude
+    across 3 bands), while negative deviations are shown in red (light to dark). The
+    title reads "Server CPU Load (24h) · horizon-basic · plotly · pyplots.ai" centered
+    at the top. The x-axis shows "Hour of Day" from 0 to ~24, with tick marks at 0,
+    5, 10, 15, 20. Series labels appear on the left side of each subplot. A color
+    interpretation legend is positioned in the top-right corner. The layout uses a
+    clean white background with no gridlines, and each series row has consistent height.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and axis labels readable, series labels slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Fill areas clearly visible with good color intensity bands
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/red diverging palette is colorblind-accessible
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Hour of Day" descriptive but no units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid hidden (appropriate), color legend present but small
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizon chart with band folding
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values folded into bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multi-series, color-coded bands, positive/negative mirroring
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 24-hour range visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color interpretation annotation accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both positive and negative values, multiple bands, spikes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server CPU monitoring is a classic horizon chart use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values represent deviation from baseline, scale is reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions/classes, linear flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dimensions
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses subplots with shared_xaxes, annotations; could leverage more
+          interactive features
+  verdict: APPROVED
diff --git a/plots/horizon-basic/metadata/plotnine.yaml b/plots/horizon-basic/metadata/plotnine.yaml
index a0c6d42eb0..64d06e4384 100644
--- a/plots/horizon-basic/metadata/plotnine.yaml
+++ b/plots/horizon-basic/metadata/plotnine.yaml
@@ -27,3 +27,180 @@ review:
     position=identity means bands overlay rather than stack
   - Some band colors within the same polarity (e.g., +Low, +Medium, +High) are somewhat
     similar and could benefit from more contrast
+  image_description: The plot displays a 6-panel faceted horizon chart showing "Server
+    CPU Usage Deviation" over 7 days (Mon-Sun) for 6 servers (A-F). Each facet panel
+    uses overlapping area bands with a blue-to-dark-blue gradient for positive deviations
+    (+Low, +Medium, +High) and an orange-to-dark-red gradient for negative deviations
+    (-Low, -Medium, -High). The title "Server CPU Usage Deviation · horizon-basic
+    · plotnine · pyplots.ai" appears at top. A "Band Intensity" legend on the right
+    shows all 6 band categories. The x-axis displays weekday labels, y-axis shows
+    "Folded Value (stacked bands)" from 0 to ~17. The layout uses a 2x3 grid of facets
+    with white backgrounds and subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis titles 20pt, tick labels 14pt, facet strip
+          labels 18pt bold - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, facets are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Area bands are clearly visible with good alpha (0.85), slight overlap
+          of bands as intended for horizon effect
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/orange color scheme is colorblind-safe (not red-green), though
+          the 6 shades within same hue family may be harder to distinguish
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 16:9 canvas with facets filling most of the space,
+          legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Day of Week", "Folded Value (stacked bands)")
+          but no units for y-axis (acceptable given normalized bands)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha 0.3), legend well placed; grid could be even more
+          subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizon chart implementation with folded bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, folded values on y-axis, series in facets
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, color-coded bands, positive/negative differentiation,
+          folded values
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within facet panels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 6 band intensity levels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{context} · horizon-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive and negative deviations, different patterns per server,
+          spikes visible; could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server CPU deviation monitoring is a perfect real-world use case
+          for horizon charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 168 data points (7 days × 24 hours) is realistic; deviation values
+          are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → transformation → plot →
+          save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" correctly but verbose=False warning handling
+          could be cleaner
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (facet_wrap, geom_area, scale_fill_manual, theme
+          customization) but horizon chart is a workaround via data transformation
+          rather than native plotnine feature
+  verdict: APPROVED
diff --git a/plots/horizon-basic/metadata/pygal.yaml b/plots/horizon-basic/metadata/pygal.yaml
index c44ab7de7d..b8ec5304c2 100644
--- a/plots/horizon-basic/metadata/pygal.yaml
+++ b/plots/horizon-basic/metadata/pygal.yaml
@@ -27,3 +27,175 @@ review:
   - Class-based structure is necessary for this custom chart type but technically
     violates KISS (unavoidable trade-off)
   - X-axis label Time could include units (e.g., Time HH:MM) for full compliance
+  image_description: The horizon chart displays 6 server metrics (CPU Usage, Memory,
+    Network I/O, Disk I/O, Response Time, Error Rate) over a 24-hour period from 00:00
+    to 22:00. Each metric occupies a horizontal band with time on the x-axis. Blue
+    colors (light to dark gradients in 3 bands) represent positive deviations, while
+    orange colors (light to dark gradients in 3 bands) represent negative deviations.
+    The chart title "horizon-basic · pygal · pyplots.ai" appears at the top. A legend
+    in the top-right shows positive (blues) and negative (oranges) color bands. Series
+    labels appear on the left side, x-axis time labels at the bottom with "Time" as
+    the axis title. Subtle dashed vertical grid lines help track time across all series.
+    Each row has a light gray background with subtle borders separating the series.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, series labels, and time labels all clearly readable at full
+          size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, time labels appropriately spaced at 2-hour intervals
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Horizon bands visible and color gradients clear, though some subtle
+          variations could be more distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-orange diverging scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas with balanced margins, plot fills appropriate
+          area
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: X-axis has "Time" but no units; Y-axis labels are series names only
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid lines at regular intervals, legend well-placed
+          in top-right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizon chart implementation with color-coded bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values shown as folded bands with color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, 2-4 bands (3 used), positive/negative coloring
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, 24-hour range displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows positive/negative color gradients
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "horizon-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both positive and negative values, varying intensities across
+          all 6 metrics
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Server monitoring scenario is plausible and commonly used
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values represent realistic deviations from baseline for server metrics
+    code_quality:
+      score: 6
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses custom HorizonChart class extending pygal.Graph (necessary for
+          custom chart type, but violates KISS)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.html first, plot.png second (acceptable but should
+          be plot.png as primary)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Extends pygal's Graph class to create custom horizon chart, leveraging
+          SVG rendering capabilities
+  verdict: APPROVED
diff --git a/plots/horizon-basic/metadata/seaborn.yaml b/plots/horizon-basic/metadata/seaborn.yaml
index 3e21771877..2abe959ef8 100644
--- a/plots/horizon-basic/metadata/seaborn.yaml
+++ b/plots/horizon-basic/metadata/seaborn.yaml
@@ -26,3 +26,181 @@ review:
     functions (acceptable limitation for horizon charts)
   - Grid lines are quite subtle and could be slightly more visible for time tracking
   - Legend Deviation from Baseline title could specify units (percentage points)
+  image_description: 'The plot displays a horizon chart showing 5 server metrics (Web
+    Server, Database, Cache, API Gateway, Auth Service) over a 24-hour period from
+    00:00 to 20:00+. Each row represents one server with color-coded bands: blues
+    (light to dark) for positive deviations and reds/oranges (light to dark) for negative
+    deviations from baseline. The title "horizon-basic · seaborn · pyplots.ai" appears
+    at the top in bold. A legend in the upper right explains the 6 color bands (Low/Medium/High
+    for both positive and negative). X-axis shows time in 4-hour intervals. Server
+    names are displayed as y-axis labels on the left. The visualization effectively
+    demonstrates the horizon chart concept with folded bands showing intensity through
+    color depth.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, server labels and axis labels are clearly readable
+          at 16-20pt, tick labels are appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout with proper spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Horizon bands are clearly visible with good color intensity differentiation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/red diverging scheme is reasonably colorblind-safe, though blue-orange
+          would be slightly better
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with proper margins, slight issue with legend
+          positioning near top edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label "Time (24-hour period)" but no units
+          context for deviation values
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-positioned and informative; subtle x-axis grid present
+          but could be more visible for time tracking
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizon chart implementation with folded bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, deviation values properly mapped to color bands
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple time series (5 servers), color-coded bands (3 per direction),
+          positive/negative distinction
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All time points visible, full 24-hour range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly explains all 6 band levels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "horizon-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows positive and negative deviations, multiple servers with different
+          patterns (web server daily cycle, database spikes, cache flushes, auth login
+          peaks). Minor: some patterns could be more distinctive'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server CPU monitoring is a perfect real-world scenario for horizon
+          charts, patterns are realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Deviation range ±50% is reasonable, though the 15-minute intervals
+          (96 points) is good; values clipped appropriately
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script with imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.patches, pyplot, numpy, pandas,
+          seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn and matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves as ''plot.png'' but header says Quality: 82 which is stale'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's color palettes (sns.color_palette), styling (sns.set_style,
+          sns.set_context), and despine function. However, the core visualization
+          uses matplotlib's fill_between rather than seaborn's native plotting functions,
+          which is acceptable since seaborn doesn't have a native horizon chart function
+  verdict: APPROVED
diff --git a/plots/icicle-basic/metadata/altair.yaml b/plots/icicle-basic/metadata/altair.yaml
index f9dded2c41..b3fde99458 100644
--- a/plots/icicle-basic/metadata/altair.yaml
+++ b/plots/icicle-basic/metadata/altair.yaml
@@ -25,3 +25,167 @@ review:
     users; consider using more distinct hues across levels
   - Some leaf nodes appear very narrow making the hierarchy harder to read in that
     section
+  image_description: 'The plot displays a horizontal icicle chart showing a file system
+    hierarchy. The root node (dark blue) spans the entire width at the bottom (level
+    0). Level 1 shows three main folders: "Documents", "Media", and "Projects" in
+    medium blue. Level 2 shows subfolders including "Reports", "Videos", and "Audio"
+    in lighter blue. Level 3 (lightest blue) shows leaf files like "tutorial.mp4",
+    "demo.mp4", and "podcast.mp3". The rectangles are separated by white strokes.
+    A legend on the right shows levels 0-3. The title "icicle-basic · altair · pyplots.ai"
+    appears at the top center. The Y-axis is labeled "Hierarchy Level" with values
+    0-4.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; some leaf node labels
+          in narrow rectangles are missing (by design for small nodes)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels hidden for small rectangles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles are well-sized and clearly visible with good proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue color scheme with good level differentiation, but all blues
+          may be harder for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space, plot fills ~70% of the area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label "Hierarchy Level", but no units (appropriate
+          for this context); X-axis hidden (appropriate)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is clear and well-placed; no grid needed for this chart type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct icicle chart with horizontal orientation (top-to-bottom hierarchy)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchy levels correctly mapped to Y-axis, values determine width
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows parent-child relationships, proportional sizing, labels for
+          larger nodes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All hierarchy levels visible (0-4)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Level legend accurately represents the data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "icicle-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows multiple hierarchy levels, varying node sizes, parent-child
+          relationships; minor: some leaf nodes very narrow'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File system hierarchy is a realistic, neutral scenario matching spec's
+          application examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: File sizes in MB are realistic; some variation in proportions could
+          be more balanced
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → processing → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random), but no explicit seed statement
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/icicle-basic/metadata/bokeh.yaml b/plots/icicle-basic/metadata/bokeh.yaml
index 35daa76649..665326a0a7 100644
--- a/plots/icicle-basic/metadata/bokeh.yaml
+++ b/plots/icicle-basic/metadata/bokeh.yaml
@@ -25,3 +25,172 @@ review:
   - Code uses helper functions (calc_value, assign_level, layout_icicle) instead of
     KISS linear script style
   - Could leverage Bokeh HoverTool for interactive tooltips showing node details
+  image_description: 'The plot displays a file system hierarchy as an icicle chart
+    with three horizontal layers stacked top-to-bottom. The top layer shows a single
+    dark blue (#306998) "Root" rectangle spanning the full width. The second layer
+    (Categories) contains three medium blue rectangles: Documents (750 MB), Media
+    (1600 MB), and Code (950 MB), with widths proportional to their values. The third
+    layer (Subcategories) shows 9 lighter blue rectangles representing leaf nodes:
+    Reports (350 MB), Contracts (250 MB), Notes (150 MB), Images (500 MB), Videos
+    (800 MB), Audio (300 MB), Python (400 MB), JavaScript (350 MB), and Data (200
+    MB). Level labels appear on the left margin (Root, Categories, Subcategories).
+    White borders separate rectangles, and the background is light gray (#fafafa).
+    The title "File System Structure · icicle-basic · bokeh · pyplots.ai" is centered
+    at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title 36pt, labels 20-24pt, all readable. Minor: smallest nodes
+          have smaller text'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles well-sized, proportional to values
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue gradient is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for icicle charts (axes hidden by design)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Appropriate: no grid/legend needed for this chart type'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct icicle chart with horizontal top-to-bottom orientation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchy as rows, values as widths
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: hierarchical layout, proportional sizing,
+          color by level, conditional labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Level labels accurate and well-placed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct: "File System Structure · icicle-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 3 hierarchy levels, varying sizes, multiple branches
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File system with folders/sizes is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in MB are realistic (150-800 MB ranges)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses helper functions instead of linear script style
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png correctly, also saves HTML
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource, rect glyph, Label annotations, export_png,
+          HTML output
+  verdict: APPROVED
diff --git a/plots/icicle-basic/metadata/highcharts.yaml b/plots/icicle-basic/metadata/highcharts.yaml
index 5983870229..f46e4b52e8 100644
--- a/plots/icicle-basic/metadata/highcharts.yaml
+++ b/plots/icicle-basic/metadata/highcharts.yaml
@@ -26,3 +26,167 @@ review:
   - Some leaf node labels are truncated (e.g., Footer, Modal.tsx) - could show more
     with smaller font
   - Uses helper functions instead of pure KISS linear code structure
+  image_description: 'The plot displays an icicle chart visualizing a file system
+    hierarchy. At the top is a dark gray "Project Files" root node spanning the full
+    width. Below it are four color-coded main directories: blue "src" (largest portion),
+    yellow "docs", purple "tests", and cyan "assets". Each directory is subdivided
+    into subdirectories (components, utils, api, images, styles) and leaf nodes showing
+    individual files with sizes in KB (e.g., "guide.md (120 KB)", "banner.jpg (280
+    KB)"). The chart uses top-to-bottom orientation with children stacked below parents.
+    A legend at the bottom identifies the four main categories. The title "icicle-basic
+    · highcharts · pyplots.ai" appears at the top with an explanatory subtitle.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 7
+        max: 10
+        passed: true
+        comment: Title and main labels clear; some leaf labels truncated with "..."
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels truncated appropriately
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles well-sized, hierarchy clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette (blue, yellow, purple, cyan)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for icicle; subtitle provides context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct icicle chart with layered rectangles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Parent-child relationships shown via spatial adjacency
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchical data, size-based rectangles, top-to-bottom orientation,
+          color by category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly identifies four main categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "icicle-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Multiple hierarchy levels (4 deep), varying sizes, different categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File system structure is realistic, neutral example from spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: File sizes realistic (28-280 KB range)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses helper functions instead of pure linear flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic hardcoded data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Creative use of Highcharts renderer API for custom SVG drawing
+  verdict: APPROVED
diff --git a/plots/icicle-basic/metadata/letsplot.yaml b/plots/icicle-basic/metadata/letsplot.yaml
index 185000ae75..dd392e996e 100644
--- a/plots/icicle-basic/metadata/letsplot.yaml
+++ b/plots/icicle-basic/metadata/letsplot.yaml
@@ -26,3 +26,174 @@ review:
   - Several level 3 labels are hidden (Reports, Contracts, Letters, Receipts, 2024,
     2023, Movies, DataViz, Frontend, Backend) - while this prevents overlap, it reduces
     information density
+  image_description: 'The plot displays a horizontal icicle chart representing a file
+    system hierarchy. At the top is a large dark blue rectangle labeled "root" (level
+    0). Below it are three yellow rectangles for "Documents", "Media", and "Projects"
+    (level 1), sized proportionally to their values. Level 2 shows light blue rectangles
+    for subfolders: Work, Personal (under Documents), Photos, Videos (under Media),
+    and Python, Web (under Projects). The deepest level 3 shows gray rectangles for
+    items like Clips and ML (visible labels), while many level 3 labels are hidden
+    due to narrow widths. White borders separate all rectangles. A legend on the right
+    shows levels 0-3 with corresponding colors. The title "icicle-basic · letsplot
+    · pyplots.ai" appears in the top left.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is clear at 24pt, level labels are readable. Some level 3 labels
+          appropriately hidden for narrow rectangles.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels intelligently hidden when rectangles
+          are too narrow
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles are well-sized with clear white borders separating them
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/gray palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but plot could extend closer to edges; some empty
+          space at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for icicle charts (no axes expected), but spec says hide axes
+          which is done correctly
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate), legend well-placed on right
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct icicle chart with horizontal orientation (root at top)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Parent-child relationships correctly shown through spatial adjacency
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchical structure, proportional sizing, color by level all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All hierarchy levels (0-3) visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend shows "Level" with 0-3 which is functional but could be more
+          descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "icicle-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 levels of hierarchy with varying sizes; good variety in node
+          sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File system with folders/files is a realistic, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: File sizes in reasonable ranges (50-400 units)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_rect and geom_text, scale_fill_manual,
+          but icicle isn't a native lets-plot chart type so it's manually constructed
+  verdict: APPROVED
diff --git a/plots/icicle-basic/metadata/matplotlib.yaml b/plots/icicle-basic/metadata/matplotlib.yaml
index 1bdca4ad90..831b4ca84a 100644
--- a/plots/icicle-basic/metadata/matplotlib.yaml
+++ b/plots/icicle-basic/metadata/matplotlib.yaml
@@ -28,3 +28,174 @@ review:
   - No random seed statement (though data is deterministic, adding np.random.seed(42)
     comment would clarify intent)
   - Could use matplotlib colormap instead of manual color list for better scalability
+  image_description: 'The plot displays a horizontal icicle chart representing a file
+    system hierarchy with 4 depth levels. The root level (blue, #306998) spans the
+    full width with "Root" label. The category level (yellow, #FFD43B) shows three
+    main folders: "Documents", "Pictures", and "Music" with proportional widths. The
+    subcategory level (teal, #4ECDC4) contains folders like "Reports", "Letters" (truncated
+    to "Lett.."), "Spreadsheets" (truncated), "Photos", "Screenshots" ("Sc.."), "Icons"
+    ("I.."), "Albums", "Playlists" ("P.."), and "Podcasts" ("Podca.."). The item level
+    (coral, #FF6B6B) shows individual files with heavily truncated labels. Level hierarchy
+    labels (Root, Category, Subcategory, Item) appear on the right side. Title is
+    correctly formatted as "icicle-basic · matplotlib · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt is excellent, main labels readable, but item-level
+          labels are heavily truncated making some unreadable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; truncation prevents overlap effectively
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles well-sized, clear gaps between rows, good visual hierarchy
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast; yellow uses black text
+          appropriately
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though right-side level labels create slight
+          asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No traditional axes (appropriate for icicle chart), but level labels
+          serve similar purpose
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed; level labels on right serve as effective legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct icicle chart with horizontal top-to-bottom layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchy correctly represented with proportional widths
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows parent-child relationships, proportional sizing, colored by
+          depth
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All hierarchy levels visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Level labels accurately describe hierarchy
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "icicle-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 hierarchy levels with varying node sizes; could show more
+          variation in leaf values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File system hierarchy is a perfect, neutral real-world example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: File sizes in reasonable ranges (15-75 units)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Flat script structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed, but data is deterministic so effectively reproducible
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and matplotlib.patches used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct parameters
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of patches.Rectangle and manual positioning, but could leverage
+          more matplotlib features like colormap
+  verdict: APPROVED
diff --git a/plots/icicle-basic/metadata/plotnine.yaml b/plots/icicle-basic/metadata/plotnine.yaml
index 6b7024676d..94f710aa7b 100644
--- a/plots/icicle-basic/metadata/plotnine.yaml
+++ b/plots/icicle-basic/metadata/plotnine.yaml
@@ -25,3 +25,177 @@ review:
   weaknesses:
   - Legend shows Hierarchy Level with just numbers 0-3 instead of the more descriptive
     labels defined in the code
+  image_description: 'The plot displays a horizontal icicle chart visualizing a file
+    system hierarchy. The root node (dark blue, 4590 MB) spans the full width at the
+    top. Below it, three level-1 children (lighter blue) show Projects (2140 MB),
+    Photos (1540 MB), and Documents (910 MB) proportionally sized. Level 2 (yellow)
+    contains items like WebApp, DataSci, Mobile, Vacation, Family, Events, Reports,
+    Invoices, and Notes. Level 3 (brown) shows leaf nodes: Backend, Frontend, Config,
+    Models, and Scripts under the Projects branch. White borders separate rectangles.
+    A legend on the right shows "Hierarchy Level" with levels 0-3. The title "icicle-basic
+    · plotnine · pyplots.ai" appears at the top center in bold.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Most text clearly readable; some narrow rectangles (Events, Invoices,
+          Notes) show name-only labels which is appropriate
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels adapt well to rectangle width
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: All rectangles clearly visible with good proportional sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good contrast between levels; blue/yellow/brown scheme is colorblind-friendly,
+          though level 2 yellow and level 4 light blue in legend could be more distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, plot fills most of the space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for icicle chart (uses theme_void), but hierarchy is clear from
+          layout
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-positioned on right, clean theme_void appropriate for
+          this chart type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct icicle chart with horizontal orientation (top-to-bottom hierarchy)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchy correctly mapped with proportional widths based on values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: hierarchical rectangles, size-based widths,
+          level coloring, labels with values'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, proper aggregation from leaves to root
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows hierarchy levels 0-3
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "icicle-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple hierarchy depths (4 levels), varying branch sizes,
+          leaf and non-leaf nodes; could have one more level for fuller demonstration
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File system with folders and sizes is an excellent, relatable example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in MB are realistic for file system; some values could show
+          more variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear structure but has some complexity with BFS algorithms;
+          acceptable for this chart type
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_rect, geom_text, scale_fill_manual,
+          theme_void; good use of plotnine but icicle charts are not a native strength
+          of plotnine
+  verdict: APPROVED
diff --git a/plots/icicle-basic/metadata/pygal.yaml b/plots/icicle-basic/metadata/pygal.yaml
index 41c3c0bcb0..bdbdc189e4 100644
--- a/plots/icicle-basic/metadata/pygal.yaml
+++ b/plots/icicle-basic/metadata/pygal.yaml
@@ -25,3 +25,177 @@ review:
     pygal implementations
   - Some leaf nodes have very narrow rectangles making labels hard to read (e.g.,
     Cov.., Th..)
+  image_description: 'The plot displays a hierarchical icicle chart with 4 levels
+    arranged vertically (top-to-bottom). The root node (blue, #306998) spans the full
+    width at the top labeled "Root". Below it, three category nodes in yellow (#FFD43B)
+    show "Documents", "Pictures", and "Music" with proportional widths. The third
+    level (teal, #4ECDC4) contains subcategories like "Reports", "Letters", "Spreadsheets",
+    "Photos", "Screenshots", "Icons", "Albums", "Playlists", and "Podcasts". The bottom
+    level (coral, #FF6B6B) shows individual items like "Q1 Report", "Q2 Report", "Photo
+    1", "Rock", "Jazz", etc. Level labels appear on the right side (Root, Category,
+    Subcategory, Item). A legend at the bottom shows the four hierarchy levels with
+    color coding. The title "icicle-basic · pygal · pyplots.ai" is prominently displayed
+    at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and most labels clearly readable; some smaller items use truncated
+          labels but remain legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels appropriately truncated for narrow rectangles
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Rectangles well-sized and visible; small items like "Cov..", "Th..",
+          "An.." are narrow but still distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette (blue, yellow, teal, coral) with
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space with balanced margins; level labels on right
+          add context
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for icicle charts (no traditional axes); level labels serve similar
+          purpose
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean legend at bottom, no distracting grid elements
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct icicle chart with horizontal orientation (top-to-bottom)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchy correctly mapped with parent-child relationships
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows hierarchy levels, proportional sizing, color by level, labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes visible and appropriately sized
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately shows 4 hierarchy levels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "icicle-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 levels of hierarchy with varying sizes; could show more variation
+          in branch depths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File system metaphor is excellent and intuitive (Documents, Pictures,
+          Music folders)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable; some items could have more variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Code uses procedural style but is more complex due to manual SVG
+          generation (necessary for icicle chart)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal Style for theming and config; SVG generation is manual
+          since pygal lacks native icicle chart support
+  verdict: APPROVED
diff --git a/plots/icicle-basic/metadata/seaborn.yaml b/plots/icicle-basic/metadata/seaborn.yaml
index 522eb36836..7490e2bb1a 100644
--- a/plots/icicle-basic/metadata/seaborn.yaml
+++ b/plots/icicle-basic/metadata/seaborn.yaml
@@ -24,3 +24,171 @@ review:
   - Value labels (MB sizes) not displayed in rectangles - spec emphasizes size represents
     value
   - Some labels are truncated (Docume.., Templa..) reducing readability
+  image_description: 'The plot displays a basic icicle chart visualizing a file system
+    hierarchy structure. At the top is a large dark blue rectangle labeled "Root"
+    spanning the full width. Below it are three medium steel-blue rectangles for main
+    categories: "Projects", "Media", and "Docume.." (Documents truncated). The third
+    row contains lighter blue rectangles for subcategories: Designs, Code, Audio,
+    Videos, Images, Templa.., Presen.., and Reports. The bottom row shows the lightest
+    blue file nodes with truncated names like Record.., Tutori.., Screen.., Photos,
+    Traini.., Sales.., Annu.. The Blues_r color palette creates a clear visual gradient
+    from dark (root) to light (files). A legend in the bottom right explains hierarchy
+    levels. The title reads "icicle-basic · File System Structure · seaborn · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 32
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Most text readable, but truncated labels reduce clarity
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles well-sized and clearly visible with good spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues_r palette is colorblind-safe sequential scheme
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout but some wasted space at bottom; plot could extend further
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for icicle chart (no axes), but no size values shown in rectangles
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend well placed but no value information displayed in the chart
+    spec_compliance:
+      score: 21
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct icicle chart with hierarchical stacked rectangles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Parent-child relationships correctly shown through spatial adjacency
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: 'Missing: values not displayed (spec says "size represents value")'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All hierarchy levels visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies hierarchy levels
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: Title includes extra "File System Structure" instead of exact format
+          "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 levels of hierarchy, varied branch sizes; some leaf nodes
+          missing
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: File system with folders/files is excellent real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: File sizes in MB are realistic and sensible
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script with imports, data, plot, save
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is good
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current seaborn API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Note: code references plot.png correctly'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_style, sns.set_context, sns.color_palette, sns.despine;
+          but core drawing is matplotlib Rectangle patches
+  verdict: APPROVED
diff --git a/plots/learning-curve-basic/metadata/altair.yaml b/plots/learning-curve-basic/metadata/altair.yaml
index 48b2abde5a..d46a2db2d9 100644
--- a/plots/learning-curve-basic/metadata/altair.yaml
+++ b/plots/learning-curve-basic/metadata/altair.yaml
@@ -22,3 +22,180 @@ review:
   - Legend configuration uses resolve_legend which creates duplicate legend behavior
   - Missing interactive features (tooltips, hover) for exploring exact values
   - Y-axis scale could be tighter to better show validation curve detail
+  image_description: "The plot displays a model learning curve with two distinct curves\
+    \ against a light gray grid background. The title \"learning-curve-basic · altair\
+    \ · pyplots.ai\" appears at the top in a large, readable font. The X-axis is labeled\
+    \ \"Training Set Size (samples)\" ranging from 100 to approximately 980 samples.\
+    \ The Y-axis is labeled \"Accuracy Score\" ranging from 0.66 to 1.02. \n\nThe\
+    \ **blue line** represents Training Score - it starts high around 0.98 and remains\
+    \ relatively stable with a very slight downward trend, surrounded by a light blue\
+    \ shaded confidence band. The **yellow/gold line** represents Validation Score\
+    \ - it starts lower around 0.74 and shows the characteristic learning curve improvement,\
+    \ rising to approximately 0.86 as training data increases, with a wider yellow\
+    \ confidence band that narrows as more data is added. A legend box in the top-right\
+    \ corner clearly distinguishes the two curve types with appropriate color coding."
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, all text and data cleanly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are thick (strokeWidth=3), bands have appropriate opacity (0.3)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas but Y-axis could better utilize the vertical space
+          (gap at top from 0.86 to 1.02 has limited data)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with context: "Training Set Size (samples)" and "Accuracy
+          Score"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is at 0.3 opacity (good), but legend shows TWO separate legend
+          boxes due to `resolve_legend(color="independent")` which is confusing
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct learning curve visualization with training and validation
+          scores
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=training set size, Y=accuracy score, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has both curves, confidence bands (±1 std), distinct colors, legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Shows all data points from 100 to 1000 samples
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify Training Score and Validation Score
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format `{spec-id} · {library} · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows learning curve characteristics: training high, validation
+          improving, gap closing. Could show more variance in early stages'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classification model accuracy scores are realistic (0.74-0.98 range)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible, but validation starts slightly high for typical
+          underfitting scenarios
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data generation → DataFrame creation
+          → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses layered charts (`alt.layer`) and `mark_area` for bands, but
+          doesn't leverage Altair's interactivity or tooltips which would enhance
+          learning curve exploration
+  verdict: APPROVED
diff --git a/plots/learning-curve-basic/metadata/bokeh.yaml b/plots/learning-curve-basic/metadata/bokeh.yaml
index e7a913a618..b8ec299059 100644
--- a/plots/learning-curve-basic/metadata/bokeh.yaml
+++ b/plots/learning-curve-basic/metadata/bokeh.yaml
@@ -25,3 +25,174 @@ review:
   - Legend positioned in bottom-right corner could be moved closer to the curves (e.g.,
     top_left inside plot area would be near the training curve)
   - Text sizes, while adequate, are on the lower end for the 4800x2700 canvas
+  image_description: The plot displays a model learning curve with two distinct lines
+    on a light gray background. The **blue line** (Training Score) remains consistently
+    high at approximately 0.97-0.98 across all training set sizes, with a narrow light
+    blue confidence band. The **orange/yellow line** (Validation Score) starts at
+    around 0.68 for small training sets and curves upward asymptotically, reaching
+    approximately 0.89 at 900 samples, with a wider yellow confidence band that narrows
+    as sample size increases. The title "learning-curve-basic · bokeh · pyplots.ai"
+    appears at the top left. The x-axis is labeled "Training Set Size (samples)" (range
+    50-900) and the y-axis is labeled "Accuracy Score" (range 0.6-1.0). A legend in
+    the bottom-right corner identifies both curves. The grid uses dashed lines with
+    subtle alpha.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and labels readable but could be slightly larger for optimal
+          viewing at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines and markers well-sized (size=22), bands clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and orange (#E6A800) are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good plot area utilization, legend placement in corner slightly isolated
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Training Set Size (samples)", "Accuracy
+          Score"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha=0.3, dashed), legend functional but positioned
+          far from data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct learning curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=training sizes, Y=scores, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Training/validation curves with shaded confidence bands present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies both curves
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "learning-curve-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows bias-variance tradeoff (high training, lower validation), convergence
+          pattern, but gap could be more pronounced initially
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plausible ML classification scenario with accuracy metric
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Accuracy values realistic (0.65-0.99), sample sizes reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses HoverTool for interactivity (good), Band for confidence intervals,
+          ColumnDataSource. HTML output preserved. Could leverage more Bokeh-specific
+          features.
+  verdict: APPROVED
diff --git a/plots/learning-curve-basic/metadata/highcharts.yaml b/plots/learning-curve-basic/metadata/highcharts.yaml
index ca959e8a2d..3dc498d2aa 100644
--- a/plots/learning-curve-basic/metadata/highcharts.yaml
+++ b/plots/learning-curve-basic/metadata/highcharts.yaml
@@ -26,3 +26,172 @@ review:
   - Legend does not include entries for the confidence bands (±1 std) - only shows
     the line series
   - Y-axis could start at 0.65 instead of 0.60 to reduce empty space at the bottom
+  image_description: |-
+    The plot displays a learning curve visualization on a white background. The title "learning-curve-basic · highcharts · pyplots.ai" appears at the top in bold black text, with a subtitle "Model Performance vs Training Set Size" below it. The X-axis shows "Training Set Size (samples)" ranging from 0 to 1700, and the Y-axis shows "Accuracy Score" ranging from 0.60 to 1.02. Two lines are displayed:
+    - A **blue line with square markers** representing Training Score, starting high (~0.99) and gradually decreasing to ~0.94
+    - A **yellow/gold line with triangle markers** representing Validation Score, starting low (~0.72) and increasing toward ~0.93
+    Both lines have semi-transparent shaded confidence bands showing ±1 standard deviation. The blue band is narrower (training is more consistent), while the yellow band is wider at low training sizes and narrows as training size increases. A legend in the top-right corner clearly identifies both series. Dashed grid lines provide reference.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend are all clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines and markers are well-sized; confidence bands visible but markers
+          could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout, but plot area could use slightly more vertical
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Training Set Size
+          (samples)" and "Accuracy Score"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate, but legend shows only lines without the ±1 std
+          bands mentioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct learning curve with training/validation lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=training sizes, Y=scores, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has both curves, shaded confidence bands, clear distinction
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Training Score and Validation Score
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "learning-curve-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows classic learning curve pattern: high training/low validation
+          at small sizes, convergence at larger sizes; could show slight overfitting
+          gap more clearly'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic ML scenario showing typical model learning behavior
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Accuracy values are realistic (0.72-0.99), training sizes reasonable;
+          minor: y-axis minimum at 0.6 leaves some empty space'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions/classes, follows imports → data → plot → save pattern
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts patterns
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates intermediate plot_raw.png (cleaned
+          up)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of arearange for confidence bands, proper series layering
+          with zIndex, interactive HTML output also generated
+  verdict: APPROVED
diff --git a/plots/learning-curve-basic/metadata/letsplot.yaml b/plots/learning-curve-basic/metadata/letsplot.yaml
index 807b7f0afe..39ee41a035 100644
--- a/plots/learning-curve-basic/metadata/letsplot.yaml
+++ b/plots/learning-curve-basic/metadata/letsplot.yaml
@@ -23,3 +23,171 @@ review:
   - Y-axis label could include units/scale indicator (e.g., Accuracy Score 0-1)
   - Could demonstrate more lets-plot specific features like tooltips or interactive
     elements
+  image_description: |-
+    The plot shows a learning curve with two distinct lines on a light gray background. The title "learning-curve-basic · letsplot · pyplots.ai" appears at the top. The x-axis displays "Training Set Size (samples)" ranging from 0 to 1,600, and the y-axis shows "Accuracy Score" ranging from 0.55 to 1.0. Two curves are visible:
+    - A **blue line** (Training Score) starting around 0.88 and quickly rising to plateau near 0.99
+    - A **yellow/gold line** (Validation Score) starting around 0.67 and gradually rising to about 0.84
+    Both curves have shaded confidence bands showing standard deviation. The legend is positioned at the bottom center, clearly labeling "Training Score" and "Validation Score". The plot demonstrates a classic overfitting pattern with a gap between training and validation scores.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines and points are clearly visible; markers slightly small but
+          acceptable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow/gold are colorblind-safe and high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis says "Accuracy Score" but lacks units (could be "Accuracy
+          Score (0-1)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle gray grid, legend well positioned at bottom
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct learning curve with two lines and confidence bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Training sizes on X, scores on Y correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has both training/validation curves, shaded confidence bands, proper
+          legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels Training Score and Validation Score
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overfitting pattern with gap between training and validation;
+          demonstrates improvement with more data
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: ML model evaluation is a perfect realistic scenario for learning
+          curves
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Score values 0.65-0.99 are realistic; training sizes 50-1600 are
+          reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current lets_plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar correctly with geom_ribbon for confidence bands,
+          manual color scales, and theme customization; could leverage more interactive
+          features
+  verdict: APPROVED
diff --git a/plots/learning-curve-basic/metadata/matplotlib.yaml b/plots/learning-curve-basic/metadata/matplotlib.yaml
index e279f63d86..2ab18623cf 100644
--- a/plots/learning-curve-basic/metadata/matplotlib.yaml
+++ b/plots/learning-curve-basic/metadata/matplotlib.yaml
@@ -24,3 +24,179 @@ review:
   - Y-axis label could include units or clarification that it is a dimensionless score
   - Could add annotations to highlight the overfitting gap or key insights about the
     learning curve
+  image_description: 'The plot displays a learning curve with two lines on a white
+    background. The training score is shown in blue (Python logo blue, #306998) with
+    circular markers and a light blue confidence band. The validation score is shown
+    in yellow/gold (#FFD43B) with square markers and a corresponding light yellow
+    confidence band. The x-axis shows "Training Set Size (samples)" ranging from 0
+    to 1600, and the y-axis shows "Accuracy Score" ranging from approximately 0.55
+    to 1.0. The title "learning-curve-basic · matplotlib · pyplots.ai" appears at
+    the top. A legend in the lower right clearly identifies both curves. The training
+    score starts around 0.88 and quickly rises to ~0.99, while the validation score
+    starts around 0.67 and gradually increases to ~0.85, showing a classic overfitting
+    pattern with a persistent gap between training and validation performance.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers at size 10 with linewidth 3 are appropriate for 10 data points;
+          confidence bands clearly visible with alpha=0.2
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe and provide excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills the canvas well with balanced margins using tight_layout()
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Training Set Size (samples)", "Accuracy
+          Score") but y-axis could specify accuracy is dimensionless
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), legend well-placed in lower right;
+          grid could be slightly more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct learning curve with two lines and confidence bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows training set sizes, Y-axis shows scores
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: two curves, shaded confidence bands,
+          legend, proper labeling'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis from 0.55 to 1.02 shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly distinguishes "Training Score" from "Validation
+          Score"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "learning-curve-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows classic overfitting pattern with gap between training/validation;
+          however, could demonstrate the convergence scenario or underfitting as well
+          to fully showcase learning curve diagnostics
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible ML training scenario with accuracy scores; the training
+          score reaching 0.99 is realistic for a slightly overfit model
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Training sizes from 50-1600 samples and accuracy scores 0.65-0.99
+          are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses standard matplotlib features (fill_between, plot with markers)
+          competently but doesn't leverage more advanced features like annotations,
+          secondary axes, or custom styling
+  verdict: APPROVED
diff --git a/plots/learning-curve-basic/metadata/plotly.yaml b/plots/learning-curve-basic/metadata/plotly.yaml
index 76c80f94f1..bffb7feb00 100644
--- a/plots/learning-curve-basic/metadata/plotly.yaml
+++ b/plots/learning-curve-basic/metadata/plotly.yaml
@@ -24,3 +24,182 @@ review:
     statistics
   - Data shows a relatively mild overfitting scenario; more dramatic gap could better
     illustrate the concept
+  image_description: 'The plot displays a model learning curve with two distinct lines
+    on a white background. The **Training Score** line (dark blue, #306998) starts
+    at approximately 0.98 and gradually decreases to around 0.92 as training set size
+    increases. The **Validation Score** line (golden yellow, #FFD43B) starts at approximately
+    0.73 and improves to around 0.86, showing the characteristic learning curve convergence
+    pattern. Both lines have semi-transparent shaded regions representing ±1 standard
+    deviation confidence bands. The x-axis shows "Training Set Size" from 0 to 3000,
+    and the y-axis shows "Accuracy Score" from 0.65 to 1.0. The title "learning-curve-basic
+    · plotly · pyplots.ai" is centered at the top. A legend in the lower right corner
+    clearly identifies both curves. Markers are visible at each data point (10 sizes
+    total).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title (32pt), axis labels (24pt), tick labels (18pt) - all clearly
+          readable at high resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Lines and markers well-sized (width=4, markers=14), confidence bands
+          clearly visible. Minor: markers slightly large for the density'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good margins, plot fills canvas well, balanced whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive but lack units ("Accuracy Score" could specify
+          range 0-1)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha 0.1), legend well placed but could have slightly
+          better styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct learning curve with training/validation lines and confidence
+          bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=training set sizes, Y=scores correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has shaded confidence bands (±1 std), distinct colors, legend, appropriate
+          axes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range [0.65, 1.02] shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Training Score" and "Validation Score"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "learning-curve-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows classic learning curve pattern: high training score decreasing
+          slightly, validation improving and converging. Shows the gap (variance indicator).
+          Minor: could show more extreme overfitting scenario'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Accuracy scores (0.73-0.98) are realistic for ML models; training
+          sizes (50-3000) are practical
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are sensible; training scores slightly high at 0.98 but acceptable
+          for simulated data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → plotting → save. No functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png" and "plot.html"
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Figure and go.Scatter correctly, creates shaded bands using
+          fill="toself", but doesn't leverage Plotly-specific interactive features
+          like hover templates or annotations that would enhance the learning curve
+          visualization
+  verdict: APPROVED
diff --git a/plots/learning-curve-basic/metadata/plotnine.yaml b/plots/learning-curve-basic/metadata/plotnine.yaml
index 7d047a6d9d..d0ba0375d3 100644
--- a/plots/learning-curve-basic/metadata/plotnine.yaml
+++ b/plots/learning-curve-basic/metadata/plotnine.yaml
@@ -23,3 +23,177 @@ review:
   weaknesses:
   - Missing grid lines which would help read exact values from the plot
   - Y-axis label could include units or range indicator
+  image_description: 'The plot shows a learning curve with two lines and confidence
+    bands. The training score (blue, #306998) starts at ~0.88 and quickly rises to
+    ~0.99, remaining stable. The validation score (yellow/gold, #FFD43B) starts at
+    ~0.70 and gradually improves to ~0.89. Both lines have shaded confidence bands
+    showing variability. The x-axis shows "Training Set Size" from approximately 50
+    to 800, and the y-axis shows "Accuracy Score" ranging from 0.6 to 1.0. The title
+    follows the correct format: "learning-curve-basic · plotnine · pyplots.ai". The
+    legend is positioned in the lower right area of the plot, clearly distinguishing
+    "Training Score" and "Validation Score". The overall layout uses a clean minimal
+    theme with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title is prominent, axis labels are
+          well-sized, tick labels are legible'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are thick (size=2), confidence bands have appropriate alpha
+          (0.25)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow/gold are colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (Accuracy Score could be "Accuracy
+          Score (0-1)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No visible grid lines (spec mentions grid could be useful for reading
+          values)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct learning curve with training and validation lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=training set size, Y=accuracy score, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shaded confidence bands, two distinct curves, clear legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels "Training Score" and "Validation Score" are accurate
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: true
+        comment: 'Title format is correct: "learning-curve-basic · plotnine · pyplots.ai"
+          ✓ (2/2)'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows typical ML learning pattern: high training score, improving
+          validation, gap between them (variance). Could show more pronounced initial
+          gap for clearer bias-variance illustration'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic ML classification scenario with accuracy scores
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (0.7-0.99 accuracy), though starting validation
+          at 0.70 is somewhat high for small training sets
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of ggplot grammar: geom_ribbon for confidence bands, geom_line,
+          scale_color_manual, theme customization. Could use faceting or other advanced
+          features for bonus points'
+  verdict: APPROVED
diff --git a/plots/learning-curve-basic/metadata/pygal.yaml b/plots/learning-curve-basic/metadata/pygal.yaml
index 15a80ad218..13854907ef 100644
--- a/plots/learning-curve-basic/metadata/pygal.yaml
+++ b/plots/learning-curve-basic/metadata/pygal.yaml
@@ -23,3 +23,179 @@ review:
   - Legend at bottom appears slightly clipped/truncated, reducing readability
   - Lower portion of the plot (y < 0.7) is mostly empty white space
   - Does not fully leverage pygal interactive SVG features (tooltips, hover effects)
+  image_description: The plot displays a learning curve with two main lines on a white
+    background. The **blue line** represents the "Training Score (±1σ band)" staying
+    consistently high around 0.93-0.95 across all training set sizes. The **yellow/gold
+    line** represents the "Validation Score (±1σ band)" starting lower at ~0.71 for
+    100 samples and improving to ~0.87 at 1000 samples. Both curves have dashed confidence
+    bands (±1 standard deviation) shown as thinner dashed lines. The X-axis shows
+    "Training Set Size (samples)" from 100 to 1000. The Y-axis shows "Accuracy Score"
+    from 0.5 to 1.0. The title correctly follows the format "learning-curve-basic
+    · pygal · pyplots.ai". Legend is placed at the bottom with two columns. Dot markers
+    are visible on the main curves. The gap between training and validation curves
+    clearly demonstrates the variance/overfitting pattern typical of learning curves.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: title, axis labels, and tick labels are all readable; font sizes
+          scaled appropriately for 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: markers visible, lines clear; dots could be slightly larger for emphasis
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue and yellow/gold are distinguishable even for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: good proportions but lower half of plot area is unused due to y-range
+          starting at 0.5
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'descriptive labels with units: "Training Set Size (samples)" and
+          "Accuracy Score"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: horizontal grid only, subtle; but the **legend appears clipped/truncated
+          at the bottom edge** making it difficult to read fully
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct XY line chart for learning curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows training set sizes, Y shows accuracy scores
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: has training/validation curves, confidence bands (±1σ dashed lines),
+          legend distinguishing curves
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible, appropriate y-range (0.5-1.0)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: legend labels accurately describe the series with "(±1σ band)" notation
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: correct format "learning-curve-basic · pygal · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows typical learning curve pattern with gap closing as training
+          size increases; could show a bit more variance in early stages
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: simulates sklearn learning_curve output with realistic accuracy values
+          for a classification task
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: accuracy values are realistic (0.7-0.95); the y-axis range (0.5-1.0)
+          leaves unused space
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'flat script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set correctly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only numpy, pygal, and Style imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: using current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: saves both plot.html and plot.png but the spec only requires plot.png;
+          this is acceptable but redundant
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: uses pygal XY chart, custom Style, stroke_style for line width, dashed
+          lines for confidence bands. Does not leverage interactive tooltips or SVG-specific
+          features more fully.
+  verdict: APPROVED
diff --git a/plots/learning-curve-basic/metadata/seaborn.yaml b/plots/learning-curve-basic/metadata/seaborn.yaml
index d7940d1857..080472a110 100644
--- a/plots/learning-curve-basic/metadata/seaborn.yaml
+++ b/plots/learning-curve-basic/metadata/seaborn.yaml
@@ -27,3 +27,176 @@ review:
     would be clearer)
   - fill_between is used directly from matplotlib rather than a seaborn-native approach
   - Data scenario is generic rather than depicting a specific real-world ML application
+  image_description: The plot displays a learning curve with two lines and shaded
+    confidence bands on a white/gray grid background. The training score line (blue
+    with circle markers) starts at approximately 0.98 and gradually decreases to around
+    0.95. The validation score line (yellow/gold with square markers) starts at approximately
+    0.67 and increases asymptotically toward 0.90. Both curves have semi-transparent
+    shaded regions representing the standard deviation bands (±1 std). The x-axis
+    shows "Training Set Size" ranging from 0 to 2000, and the y-axis shows "Accuracy
+    Score" ranging from 0.5 to 1.0. The title reads "learning-curve-basic · seaborn
+    · pyplots.ai" and a legend in the lower right distinguishes Training Score from
+    Validation Score.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3, marker size of 10, shaded bands all clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and highly
+          distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units ("Accuracy Score" could include
+          "(0-1)" or similar)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), legend is well-placed in lower right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct learning curve with two lines and confidence bands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=training set size, Y=scores - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has both training and validation curves with shaded confidence bands
+          as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0.5-1.0, encompassing all data with appropriate context
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Training Score" and "Validation Score"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows classic learning curve pattern: high training score that decreases
+          slightly, low validation score that increases, narrowing gap'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Plausible ML scenario but generic (no specific model/dataset context
+          mentioned in labels)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Accuracy values are realistic (0.67-0.98), training sizes are sensible
+          (50-2000)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot and sns.set_context/set_style but fill_between
+          is from matplotlib. Could leverage more seaborn-specific features like FacetGrid
+          or relplot for a more seaborn-native approach
+  verdict: APPROVED
diff --git a/plots/lift-curve/metadata/altair.yaml b/plots/lift-curve/metadata/altair.yaml
index 7a2760f814..0bff4ef571 100644
--- a/plots/lift-curve/metadata/altair.yaml
+++ b/plots/lift-curve/metadata/altair.yaml
@@ -25,3 +25,176 @@ review:
   - Could add a small legend to label the lift curve line
   - Annotation text placement could be slightly adjusted to avoid visual proximity
     to the reference line
+  image_description: The plot displays a lift curve showing cumulative lift on the
+    Y-axis (ranging from 0 to 5.0) against population targeted percentage on the X-axis
+    (0-100%). The main lift curve is a solid blue line (#306998) that starts high
+    at approximately 5.0 (capped by the Y-axis scale) at the far left, then gradually
+    decreases in a smooth curve as the population percentage increases, eventually
+    approaching 1.0 at 100%. Blue filled circular markers are placed at decile points
+    (10%, 20%, 30%, etc.) along the curve. A gray dashed horizontal reference line
+    at y=1 represents random selection, with the annotation "Random Selection (Lift
+    = 1)" placed near it. The title "lift-curve · altair · pyplots.ai" is displayed
+    at the top center. Axis labels are clearly readable with "Population Targeted
+    (%)" on the X-axis and "Cumulative Lift" on the Y-axis. The background has subtle
+    grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width is appropriate (strokeWidth=4), decile markers are clearly
+          visible (size=200)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue (#306998) and gray, colorblind-safe combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but no units (percentage sign is part of label
+          text, not a unit)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle and good (alpha=0.3), but annotation text slightly
+          overlaps the reference line area
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lift curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=population percentage, Y=cumulative lift, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Reference line at y=1, decile markers, smooth curve, all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 0-100%, Y-axis shows full lift range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present (not strictly needed but could label the lift curve)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "lift-curve · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows high lift at low percentages, gradual decline, approaches 1
+          at 100% - demonstrates all key aspects
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer churn prediction scenario with 20% churn rate is realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Lift values range from ~5x at top to 1x at bottom, realistic for
+          a good model
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses layered chart composition and tooltips, but could leverage more
+          Altair features like selections or interactive highlighting
+  verdict: APPROVED
diff --git a/plots/lift-curve/metadata/bokeh.yaml b/plots/lift-curve/metadata/bokeh.yaml
index 6b4ff148b9..c6a2fabe5c 100644
--- a/plots/lift-curve/metadata/bokeh.yaml
+++ b/plots/lift-curve/metadata/bokeh.yaml
@@ -26,3 +26,177 @@ review:
     at each percentage
   - Y-axis label could be more descriptive (e.g., Cumulative Lift Model/Random)
   - Legend text slightly small relative to other text elements on the large canvas
+  image_description: The plot displays a lift curve visualization on a light gray
+    background (#fafafa). A blue line (#306998) represents the model lift, starting
+    at approximately 5.2 at 1% population, peaking around 5.7 at 3-5%, then smoothly
+    declining toward 1.0 as population approaches 100%. A horizontal dashed gray line
+    at y=1 represents the random selection baseline. Yellow circular markers with
+    blue outlines are placed at each decile (10%, 20%, ..., 100%) along the curve.
+    The title "lift-curve · bokeh · pyplots.ai" appears in bold at the top-left. The
+    x-axis is labeled "Population Targeted (%)" ranging from 0-105, and the y-axis
+    shows "Cumulative Lift Ratio" from 0 to ~6.5. A legend in the top-right corner
+    identifies "Model Lift" (line) and "Decile Markers" (circles). Grid lines are
+    subtle with dashed styling and low alpha.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, axis labels at 32pt, tick labels at 24pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 5 is excellent, decile markers size 20 with contrasting
+          colors are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/gray palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, minor extra whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (% in x-axis label is good, y-axis
+          could specify "ratio" more clearly)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), legend well-placed but could be slightly
+          larger
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lift curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=population percentage, Y=cumulative lift ratio correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes baseline reference line at y=1, decile markers as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Shows 0-100% population, appropriate y-range with padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Model Lift and Decile Markers
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: lift-curve · bokeh · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows typical lift curve behavior: high lift at low percentages,
+          gradual decline to baseline. Could show more dramatic initial peak'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Marketing campaign customer response data is a perfect real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 15% baseline response rate and lift values 1-5.7 are realistic, though
+          lift could vary more at early percentiles
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and Span which are Bokeh features, but doesn't
+          leverage interactive capabilities like HoverTool which would be valuable
+          for showing exact lift values at each point
+  verdict: APPROVED
diff --git a/plots/lift-curve/metadata/highcharts.yaml b/plots/lift-curve/metadata/highcharts.yaml
index 88a9a5a8db..4ff2afd0b0 100644
--- a/plots/lift-curve/metadata/highcharts.yaml
+++ b/plots/lift-curve/metadata/highcharts.yaml
@@ -25,3 +25,181 @@ review:
   - Lift values at 10%, 20%, and 30% are identical (1.52x), which looks artificial;
     the data generation could show more variation
   - Code complexity with regex extraction of JS literal could be simplified
+  image_description: 'The plot displays a lift curve for a customer response model.
+    It shows a blue line ("Model Lift") starting at approximately 1.52x lift at the
+    top left, gradually decreasing as the population targeted percentage increases
+    from 0% to 100%. The curve smoothly approaches the horizontal dashed reference
+    line at y=1 (labeled "Random Selection (Lift = 1)") near 100% population. Four
+    yellow diamond markers highlight key percentiles at 10%, 20%, 30%, and 50% with
+    lift values displayed (1.52x, 1.52x, 1.52x, and 1.46x respectively). The title
+    "lift-curve · highcharts · pyplots.ai" appears at the top with a subtitle showing
+    "Customer Response Model | Baseline Rate: 65.7%". The Y-axis is labeled "Cumulative
+    Lift" (ranging 0.9-1.6) and X-axis shows "Population Targeted (%)" (0-100). A
+    legend in the top-right identifies the two series. Colors are Python blue (#306998)
+    for the line and Python yellow (#FFD43B) for the diamond markers.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable
+          at large font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and markers are clearly visible; markers could be slightly larger
+          but are adequate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout but slight asymmetry; "Random Selection" label partially
+          cut off at right edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive but Y-axis lacks units ("Cumulative Lift"
+          is unitless which is technically correct, but X-axis shows % appropriately)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; legend placement is good but could be positioned
+          better to avoid isolation in corner
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lift curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows population %, Y-axis shows cumulative lift ratio correctly
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has reference line at y=1, shows lift curve, has decile markers;
+          could show more percentile annotations
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-100% range displayed with appropriate Y-axis scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Model Lift" and "Key Percentiles"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "lift-curve · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows lift curve characteristics well: high lift at low percentages,
+          gradual decline to 1; the flat section at top (10-30%) shows same lift values
+          which is slightly artificial'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Customer response model is realistic; baseline rate of 65.7% is quite
+          high for typical marketing scenarios but plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Lift values (1.0-1.52x) are realistic for a moderately good model
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Generally follows imports→data→plot→save pattern but has some complexity
+          with regex extraction of JS
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Highcharts features: plotLines for reference, dataLabels
+          for annotations, scatter series overlay, proper LineSeries/ScatterSeries
+          combination'
+  verdict: APPROVED
diff --git a/plots/lift-curve/metadata/letsplot.yaml b/plots/lift-curve/metadata/letsplot.yaml
index e974c0ea71..2451e8c0eb 100644
--- a/plots/lift-curve/metadata/letsplot.yaml
+++ b/plots/lift-curve/metadata/letsplot.yaml
@@ -22,4 +22,161 @@ review:
   - Decile markers (points at 10% intervals) aid interpretation per spec recommendation
   - 'Proper lets-plot idioms: ggsize for dimensions, scale=3 for 4800x2700 output,
     theme customization'
-  weaknesses: []
+  weaknesses:
+  - None significant - implementation is publication quality
+  image_description: The plot displays a lift curve with a blue line (#306998) starting
+    at approximately lift value 5 at 0% population, gradually decreasing to lift value
+    1 at 100% population. The x-axis shows "Population Targeted (%)" from 0-100 in
+    increments of 10. The y-axis shows "Cumulative Lift" with values 1-5. A horizontal
+    dashed gray reference line at y=1 represents random selection, annotated with
+    "Random (Lift = 1)" near the right side. Blue dots mark decile points along the
+    curve. The title "lift-curve · letsplot · pyplots.ai" appears at top left. Minimal
+    theme with subtle gray grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text perfectly readable at proper sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: line and points well-sized for data
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue/gray colorblind-safe palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: descriptive but lift is dimensionless
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: subtle grid, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct lift curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=population %, Y=lift ratio
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: reference line, decile markers, proper curve shape
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: full range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: annotation correct
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'exact format: lift-curve · letsplot · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows full lift curve behavior
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: customer response model with 20% rate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: ~5x lift at top decile is realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only necessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/lift-curve/metadata/matplotlib.yaml b/plots/lift-curve/metadata/matplotlib.yaml
index 9b67dc5a35..1836fff285 100644
--- a/plots/lift-curve/metadata/matplotlib.yaml
+++ b/plots/lift-curve/metadata/matplotlib.yaml
@@ -25,3 +25,180 @@ review:
   weaknesses:
   - Y-axis label could include units or clarification (e.g., Cumulative Lift (ratio))
   - Does not leverage distinctive matplotlib features beyond basic plotting capabilities
+  image_description: The plot displays a lift curve with a solid blue line showing
+    the model lift decreasing from approximately 6.5x at 0% population down to 1x
+    at 100% population. A horizontal yellow dashed reference line at y=1 represents
+    random selection. Key decile markers are shown at 10%, 20%, 30%, 40%, and 50%
+    with their lift values annotated (6.14x, 4.28x, 3.14x, 2.45x, 2.00x respectively).
+    The area between the lift curve and the reference line is filled with a light
+    blue shade for visual emphasis. The title reads "lift-curve · matplotlib · pyplots.ai",
+    x-axis is labeled "Population Targeted (%)", and y-axis is labeled "Cumulative
+    Lift". A legend in the upper right identifies the blue line as "Model Lift" and
+    yellow dashed line as "Random (Lift = 1)". The plot uses a subtle gray grid with
+    dashed lines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; annotations are well-spaced above markers
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3, marker size of 150, appropriate for line chart with
+          clear visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and high
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, plot fills appropriate area with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive but y-axis could include units (e.g., "Cumulative
+          Lift (ratio)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid is subtle (alpha=0.3), legend well-placed; minor: legend could
+          be slightly smaller or positioned to avoid crowding top-right'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lift curve chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows percentage of population, Y-axis shows cumulative lift
+          ratio
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes reference line at y=1 as specified, decile markers at key
+          percentiles
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 0-100%, Y-axis shows full lift range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels both curves
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "lift-curve · matplotlib · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows characteristic lift curve behavior: high lift at low percentages,
+          gradual decay to 1'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer response model with 15% baseline rate is realistic marketing
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 1000 samples, 15% base rate, lift values (6.5x to 1x) are realistic
+          for a good model
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at the start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API (Axes methods)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic matplotlib features only; could leverage fill_between
+          more creatively or add interactive annotations
+  verdict: APPROVED
diff --git a/plots/lift-curve/metadata/plotly.yaml b/plots/lift-curve/metadata/plotly.yaml
index 6edb178762..a0e6a8e032 100644
--- a/plots/lift-curve/metadata/plotly.yaml
+++ b/plots/lift-curve/metadata/plotly.yaml
@@ -23,3 +23,175 @@ review:
   - Grid lines are very subtle (alpha 0.1) - could be slightly more visible for better
     readability
   - Marker density in early percentages could be reduced for cleaner visualization
+  image_description: 'The plot displays a lift curve with a blue line ("Model Lift")
+    showing cumulative lift ratio on the Y-axis (ranging from 0 to ~2) against percentage
+    of population targeted on the X-axis (0-100%). A dashed yellow/gold horizontal
+    line at y=1 represents the "Random Selection" baseline. The lift curve starts
+    high (~1.8x) at low percentages and gradually decreases toward 1 as the percentage
+    approaches 100%. An annotation box with an arrow points to the 10% mark, showing
+    "Top 10%: 1.6x lift". The title "lift-curve · plotly · pyplots.ai" is centered
+    at the top. The legend is positioned in the upper right corner. The background
+    is clean white with subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, annotation well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width 4 and marker size 10 are appropriate; markers slightly
+          dense in lower percentages
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight extra whitespace on left margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Percentage of Population Targeted (%)" and "Cumulative Lift Ratio"
+          are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), legend well-placed; however grid could
+          be slightly more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lift curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows percentage targeted, Y-axis shows cumulative lift ratio
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes baseline reference line at y=1, annotation at key percentile
+          (10%)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 0-100%, Y-axis shows full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Model Lift" and "Random Selection" labels are accurate'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "lift-curve · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows lift starting high and approaching 1; curve demonstrates model
+          discrimination well but could show more dramatic lift for better demonstration
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer response model predictions - plausible marketing/ML scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Lift values 1.0-1.8x are realistic; starting lift could be slightly
+          higher for more impressive demonstration
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly, but also saves HTML (not required, minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter, hover templates with formatting, annotation with
+          arrow styling, plotly_white template; could leverage more interactive features
+          like rangeslider or buttons
+  verdict: APPROVED
diff --git a/plots/lift-curve/metadata/plotnine.yaml b/plots/lift-curve/metadata/plotnine.yaml
index 35a9ccab3c..eef04bd509 100644
--- a/plots/lift-curve/metadata/plotnine.yaml
+++ b/plots/lift-curve/metadata/plotnine.yaml
@@ -22,3 +22,174 @@ review:
   weaknesses:
   - Y-axis label could include (ratio) for clarity since lift is a unitless ratio
   - Could add text annotations at key decile points showing actual lift values
+  image_description: The plot displays a lift curve with a solid blue line (#306998)
+    showing cumulative lift values on the Y-axis ranging from 0 to approximately 2.5,
+    plotted against population targeted percentage (0-100%) on the X-axis. Yellow-filled
+    circular markers with blue outlines appear at each decile point (10%, 20%, 30%,
+    etc.). A gray dashed horizontal reference line at y=1.0 represents random selection
+    (no lift). The curve starts high at approximately 2.5 when targeting the top 1-2%
+    of the population, then gradually descends and approaches 1.0 as the percentage
+    increases to 100%. The title "lift-curve · plotnine · pyplots.ai" is displayed
+    at the top. The layout uses a minimal theme with clean white background and subtle
+    grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line thickness of 2.5 and point size of 5 are appropriate, decile
+          markers clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but Y-axis lacks units (ratio is unitless but
+          could note it)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed for this single-series plot, grid is subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lift curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=population percentage, Y=cumulative lift - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Reference line at y=1, decile markers, curve showing lift behavior
+          - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 0-100%, Y-axis shows full range with room
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series (full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: lift-curve · plotnine · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows lift curve behavior well with high initial lift descending
+          to 1.0; slight variation in early curve shows realistic model behavior
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Customer response model scenario is realistic and well-documented
+          in code comments
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Lift values of 1.5-2.5x are realistic for a good predictive model
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but verbose=False is good practice
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of ggplot grammar (geom_line, geom_point, geom_hline, theme_minimal,
+          scale functions), but could use more advanced features like annotations
+  verdict: APPROVED
diff --git a/plots/lift-curve/metadata/pygal.yaml b/plots/lift-curve/metadata/pygal.yaml
index 0be9f36927..74a728b77e 100644
--- a/plots/lift-curve/metadata/pygal.yaml
+++ b/plots/lift-curve/metadata/pygal.yaml
@@ -25,3 +25,184 @@ review:
   - Legend font appears relatively small compared to axis labels
   - Missing value annotations at key decile points as suggested in spec
   - Does not fully leverage pygal interactive tooltip features
+  image_description: The plot displays a lift curve visualization on a white background.
+    A blue line labeled "Model Lift" starts at approximately 1.68 lift at the 10%
+    population mark and gradually decreases, approaching the baseline of 1.0 as it
+    reaches 100% of the population. Each decile point is marked with a visible dot.
+    A yellow horizontal line at y=1 represents "Random (No Lift)" as the baseline
+    reference. The title "lift-curve · pygal · pyplots.ai" appears at the top. The
+    X-axis is labeled "Population Targeted (%)" with decile markers (10% through 100%),
+    and the Y-axis is labeled "Lift (Model Rate / Baseline Rate)" with a range from
+    0.9 to 2.2. The legend is positioned at the bottom with two entries. The overall
+    layout is clean with good use of canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title, axis labels, and tick marks are clear.
+          Font sizes are appropriately scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and tick marks are fully
+          readable.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and dots are visible; dots_size=8 and stroke_width=6 work well.
+          Could be slightly more prominent.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization; plot fills appropriate portion of the space
+          with balanced margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive axis labels with context: "Population Targeted (%)"
+          and "Lift (Model Rate / Baseline Rate)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is visible but legend placement at bottom creates some wasted
+          space; legend font appears small relative to the chart.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart showing lift curve as specified.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows population percentage, Y-axis shows lift ratio correctly.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Includes baseline reference line at y=1 as required. Missing decile
+          value annotations at key points (spec suggests "Consider showing decile
+          markers or actual values at key percentiles").
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (0.9-2.2) appropriately shows all data points.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels "Model Lift" and "Random (No Lift)" are accurate and
+          descriptive.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title uses correct format "{spec-id} · {library} · pyplots.ai" but
+          title font could be more prominent.
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows the key lift curve behavior: high lift at low percentages
+          gradually decreasing to baseline. Could demonstrate more dramatic lift difference
+          in early deciles.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer response prediction is an excellent, realistic marketing
+          use case that matches the spec applications.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Lift values (1.68 down to 1.0) are realistic for a well-performing
+          marketing model.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save. No functions
+          or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible results.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: numpy, pygal, Style.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html correctly.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Style customization and Line chart with show_dots, but
+          could leverage more pygal-specific features like tooltips or value labels.
+  verdict: APPROVED
diff --git a/plots/lift-curve/metadata/seaborn.yaml b/plots/lift-curve/metadata/seaborn.yaml
index 111523892c..6b6b0a93bc 100644
--- a/plots/lift-curve/metadata/seaborn.yaml
+++ b/plots/lift-curve/metadata/seaborn.yaml
@@ -26,3 +26,176 @@ review:
     could be done entirely in matplotlib
   - First decile annotation appears slightly cramped near the top of the curve
   - Y-axis label could clarify that lift is a ratio
+  image_description: The plot displays a lift curve chart with a 16:9 landscape format
+    on a white background with a subtle gray grid. The main lift curve is rendered
+    as a thick blue line (#306998 Python Blue) starting at approximately 10x lift
+    on the left and smoothly decreasing to approach 1x at 100% population. A dashed
+    yellow/gold horizontal reference line at y=1 represents random selection (no lift).
+    Six decile markers are shown as blue circles at the 10%, 20%, 30%, 40%, and 50%
+    population points, with bold blue annotations showing lift values (8.30x, 4.95x,
+    3.33x, 2.50x, 2.00x). The title "lift-curve · seaborn · pyplots.ai" is prominently
+    displayed at the top in bold. X-axis shows "Population Targeted (%)" from 0-100,
+    Y-axis shows "Cumulative Lift" from 0-11. A legend in the upper right identifies
+    "Model Lift" (solid blue) and "Random (No Lift)" (dashed yellow).
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title at 24pt bold, axis labels at
+          20pt, tick labels at 16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; decile annotations are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is appropriate, markers at size 12 are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe combination with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but Y-axis lacks units (ratio is unitless, but
+          could use "x" suffix)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at alpha=0.3 is subtle; legend well-placed but could be slightly
+          smaller
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lift curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=population percentage, Y=cumulative lift ratio correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes reference line at y=1, decile markers with values, proper
+          curve shape
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 0-100%, Y-axis shows full lift range with appropriate headroom
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies both lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "lift-curve · seaborn · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows high initial lift (~10x), gradual decay, convergence to baseline
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Marketing campaign response prediction is plausible; 10% base rate
+          is realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 1000 samples, 10% response rate, lift values 1-10x are all realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Only uses sns.lineplot and sns.set_theme; the actual lift curve calculation
+          and most visualization is done with matplotlib (ax.axhline, ax.plot, ax.annotate).
+          Seaborn is primarily used for theming rather than its distinctive statistical
+          visualization features.
+  verdict: APPROVED
diff --git a/plots/line-annotated-events/metadata/altair.yaml b/plots/line-annotated-events/metadata/altair.yaml
index ba0d2f019d..8ae64276c8 100644
--- a/plots/line-annotated-events/metadata/altair.yaml
+++ b/plots/line-annotated-events/metadata/altair.yaml
@@ -26,3 +26,175 @@ review:
     chart height
   - No tooltips on event markers for interactivity (missed opportunity for Altair's
     strength)
+  image_description: 'The plot displays a stock price time series spanning January
+    2024 to December 2024, with prices ranging from approximately $120 to $200. A
+    blue line (#306998) traces the stock price trajectory showing typical market volatility.
+    Six vertical dashed yellow lines (#FFD43B) mark key events: Q4 Earnings (Feb),
+    Q1 Earnings (late Apr), Product Launch (Jun), Q2 Earnings (late Jul), Analyst
+    Upgrade (Sep), and Q3 Earnings (Nov). Each event has a yellow circular marker
+    with blue outline positioned at alternating heights above the data, with bold
+    text labels. The title "line-annotated-events · altair · pyplots.ai" appears centered
+    at the top. Axis labels show "Date" on x-axis and "Stock Price ($)" on y-axis
+    with subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text clearly readable: title ~28pt, axis labels ~22pt, tick
+          labels ~18pt, event labels ~16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Event labels at alternating heights prevent overlap, no text collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line strokeWidth=3 appropriate, markers size=300 visible, dashed
+          rules clear (-1 for rules extending beyond visible y-range)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, but slight imbalance with event markers creating
+          extra space at top (-1)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Stock Price ($)" and "Date"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha 0.3), but no legend for event types (-2)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: line plot with event annotations'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X, price on Y, events mapped correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Vertical lines, markers, and text labels all present as per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, scale domain properly set
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend present for event types (spec suggests "subtle legend or
+          key if multiple event types shown") (-1)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-annotated-events · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple event types (earnings, product launch, analyst upgrade),
+          varied timing (-1 for not showing events during both uptrends and downtrends
+          explicitly)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price with quarterly earnings and company milestones is very
+          realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Stock prices ~$120-200 with realistic daily volatility (~1.5% returns)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using y=alt.value(0), y2=alt.value(900) for rules is hacky (-1)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of layered chart composition and encoding types. Saved interactive
+          HTML. Could improve by using Altair's tooltip for hover info on events (-2)
+  verdict: APPROVED
diff --git a/plots/line-annotated-events/metadata/bokeh.yaml b/plots/line-annotated-events/metadata/bokeh.yaml
index e2a8a23b0a..6360208f86 100644
--- a/plots/line-annotated-events/metadata/bokeh.yaml
+++ b/plots/line-annotated-events/metadata/bokeh.yaml
@@ -25,3 +25,180 @@ review:
     canvas size
   - Legend is positioned far from the data in bottom-right corner
   - HTML output could leverage Bokeh hover tools for enhanced interactivity
+  image_description: 'The plot displays a time series line chart showing "Active Users
+    (thousands)" over the course of 2024 (January 2024 to January 2025). The main
+    data line is blue (#306998) with a line width of 4, showing a clear upward trend
+    from ~100 to ~185 with realistic noise and subtle seasonal variation. Five vertical
+    dashed yellow (#FFD43B) lines mark important events at different dates: "Product
+    Launch" (Feb 15), "Feature Update" (May 1), "Server Upgrade" (Jul 20), "API v2
+    Release" (Sep 10), and "Mobile App Launch" (Nov 25). Event labels are positioned
+    at alternating heights (0.92 and 0.84) to avoid overlap. The title "line-annotated-events
+    · bokeh · pyplots.ai" appears at the top left. Axis labels show "Date" on x-axis
+    and "Active Users (thousands)" on y-axis. A legend showing "Daily Active Users"
+    is positioned in the bottom right with a semi-transparent background. The background
+    is a subtle off-white (#fafafa) with dashed grid lines at 0.3 alpha.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title 28pt, axis labels 22pt, tick labels 18pt - all clearly readable,
+          event labels at 16pt are slightly small but still legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; alternating event label heights prevent collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width 4 is good; vertical event lines are clearly visible with
+          dashed style
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe; good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; legend position could be better (isolated
+          in corner with significant gap from data)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Active Users (thousands)" includes units; "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), but legend is far from the main
+          data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: line plot with event annotations'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis (datetime), values on y-axis (numeric)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has vertical lines (Span), event markers, text labels, distinct colors
+          for events vs data
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Daily Active Users" correctly describes the line'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-annotated-events · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows upward trend, noise, seasonal variation, 5 distinct events
+          with varied timing; could show events correlating with data changes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech product metrics scenario is realistic and neutral (daily active
+          users with product milestones)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: User counts in thousands (100-185k) are realistic; 365 data points
+          appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → source → figure → styling → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, bokeh components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using strict=True in zip is good Python, but could avoid it for broader
+          compatibility
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh-specific Span and Label models, ColumnDataSource, and
+          exports both PNG and HTML; could leverage hover tools or additional interactivity
+          in HTML output
+  verdict: APPROVED
diff --git a/plots/line-annotated-events/metadata/highcharts.yaml b/plots/line-annotated-events/metadata/highcharts.yaml
index d1ec05f502..179adbcfc4 100644
--- a/plots/line-annotated-events/metadata/highcharts.yaml
+++ b/plots/line-annotated-events/metadata/highcharts.yaml
@@ -26,3 +26,181 @@ review:
     labels
   - Event marker scatter points could have tooltips showing event details for better
     interactivity in HTML output
+  image_description: 'The plot shows a time series line chart spanning January 2024
+    to late June 2024. A blue line (#306998) displays "Daily Active Users" data ranging
+    from approximately 10,000 to 16,200 users, showing an overall upward trend with
+    visible seasonality and noise. Five vertical dashed yellow lines (#FFD43B) mark
+    key events: "Feature A Launch" (mid-January), "Marketing Campaign" (late February),
+    "App Redesign" (late March), "Partnership Deal" (late April), and "Mobile Update"
+    (late May). Each event is labeled at the top of the chart with alternating vertical
+    positions to avoid overlap. Yellow circular markers with black outlines appear
+    on the line at each event date. The title reads "line-annotated-events · highcharts
+    · pyplots.ai" with a subtitle "Daily Active Users with Key Milestones". The x-axis
+    shows monthly dates, y-axis shows "Daily Active Users", and a legend at the bottom
+    identifies the two series.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, labels, and axis text are all clearly readable at large font
+          sizes. Event labels are slightly smaller relative to plot size but still
+          legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Event labels use alternating heights to avoid overlap, no text collisions
+          detected.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width is appropriate, event markers are clearly visible with
+          good contrast. Line could be slightly thicker for optimal visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line and yellow markers are colorblind-safe, no red-green issues.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, chart fills most of the area. Minor margin
+          optimization possible.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Date" and "Daily Active Users" are descriptive.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines, legend is present. Legend could
+          be more prominently sized.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart with event annotations.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, values on Y-axis correctly assigned.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: vertical event lines, markers at event
+          points, text labels, distinct styling.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Daily Active Users" and "Key Events".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: `line-annotated-events · highcharts · pyplots.ai`'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend, seasonality, noise, and multiple events. Events are
+          well-distributed across the timeline.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: User growth metrics with product milestones is a plausible, neutral
+          business scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: User counts from 10K-16K are realistic. Scale is sensible though
+          values are somewhat round.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart setup → series → export.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for deterministic output.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotLines for event markers, scatter series for event points,
+          datetime axis. Could better leverage Highcharts' native annotation features
+          or tooltips.
+  verdict: APPROVED
diff --git a/plots/line-annotated-events/metadata/letsplot.yaml b/plots/line-annotated-events/metadata/letsplot.yaml
index eec4c2772e..ff9581396b 100644
--- a/plots/line-annotated-events/metadata/letsplot.yaml
+++ b/plots/line-annotated-events/metadata/letsplot.yaml
@@ -20,3 +20,175 @@ review:
   - Data shows realistic growth pattern with trend, seasonality, and event impacts
   weaknesses:
   - X-axis label says Day of Year 2024 but displays month names - should be consistent
+  image_description: 'The plot displays a line chart of Daily Active Users throughout
+    2024. A blue line (#306998) shows user growth from ~1,000 in January to ~7,500
+    by December, with visible trend and seasonality. The x-axis shows months (Jan-Jan),
+    and the y-axis shows "Daily Active Users" (0-7,500). Five vertical dashed red
+    lines (#DC2626) mark significant events with red diamond markers and bold black
+    labels: "Feature A" (Feb), "Feature B" (May), "Mobile App" (Jul), "API v2.0" (Oct),
+    and "Partners" (Nov). Labels are positioned at staggered heights (4800-6400) to
+    avoid overlap. The title "line-annotated-events · letsplot · pyplots.ai" appears
+    at top in red. Clean minimal theme with subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis titles 20pt, tick text 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Event labels at alternating heights avoid overlap completely
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 1.5 is appropriate for 365 data points, markers clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and red (#DC2626) provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, no wasted space
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: X-axis says "Day of Year 2024" but displays month names, which is
+          slightly confusing; Y-axis says "Daily Active Users" (no units, but users
+          don't need units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Subtle gray grid (alpha via color #CCCCCC), no legend needed'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with event annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, value (users) on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has vertical lines (geom_vline), markers (geom_point), text labels
+          (geom_text), distinct styling (dashed red vs solid blue)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-7500, capturing all data with headroom for labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; event labels serve as legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exactly "line-annotated-events · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows trend, seasonality, discrete jumps at events, multiple annotation
+          types
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product metrics with feature launches is a neutral, realistic business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values are reasonable (1K-7.5K DAU), but the step increases (400-800
+          users instantly) are somewhat abrupt
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_vline, geom_point, geom_text layering,
+          scale_x_continuous with custom breaks/labels, theme_minimal with element_text
+          customization
+  verdict: APPROVED
diff --git a/plots/line-annotated-events/metadata/matplotlib.yaml b/plots/line-annotated-events/metadata/matplotlib.yaml
index f0a76c2686..a561c30c86 100644
--- a/plots/line-annotated-events/metadata/matplotlib.yaml
+++ b/plots/line-annotated-events/metadata/matplotlib.yaml
@@ -24,3 +24,175 @@ review:
     in the legend - consider using a Line2D proxy artist instead
   - Could utilize more distinctive matplotlib features like ConnectionPatch for arrows
     or custom box styles
+  image_description: 'The plot displays a stock price time series (blue line, ~#306998)
+    spanning January 2024 to January 2025, with prices ranging from approximately
+    120 to 160 USD. Five event markers are annotated using yellow/gold (#FFD43B) dashed
+    vertical lines: "Q4 2023 Earnings" (Feb), "Q1 2024 Earnings" (May), "Product Launch"
+    (July), "Q2 2024 Earnings" (Aug), and "Q3 2024 Earnings" (Nov). Each annotation
+    has a white background box with yellow border and an arrow pointing to the event
+    date on the line. The annotations use alternating heights (higher/lower) to prevent
+    overlap. A legend in the upper left shows "Stock Price" (blue line) and "Event
+    Marker" (yellow dashed). The title reads "line-annotated-events · matplotlib ·
+    pyplots.ai". X-axis shows dates rotated for readability, Y-axis shows "Price (USD)".
+    Grid is subtle with dashed lines at alpha 0.3.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt, annotations 14pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Alternating heights prevent annotation overlap, no text collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Line width 2.5 is appropriate, annotations clearly visible. Minor:
+          arrow lines could be slightly thicker'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight empty space in bottom corners
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Date" and "Price (USD)" with unit'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend shows "Event Marker" with alpha=0 which appears as empty in
+          legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with event annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates on X, prices on Y, events properly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Vertical lines (axvline), annotations with labels, alternating heights
+          for dense events
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year of data visible with all events
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Stock Price and Event Marker
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows quarterly earnings + product launch, demonstrates varied event
+          types
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price with earnings announcements is a real, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Stock prices 120-160 USD, realistic daily volatility (~1.5%)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: matplotlib, numpy, pandas all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses standard matplotlib features (axvline, annotate) but nothing
+          distinctive like FancyBboxPatch, custom transforms, or advanced styling
+  verdict: APPROVED
diff --git a/plots/line-annotated-events/metadata/plotly.yaml b/plots/line-annotated-events/metadata/plotly.yaml
index fe0d6c5895..a80fc64039 100644
--- a/plots/line-annotated-events/metadata/plotly.yaml
+++ b/plots/line-annotated-events/metadata/plotly.yaml
@@ -23,3 +23,179 @@ review:
   - Grid lines are very subtle (alpha 0.1) - could be slightly more visible at 0.2-0.3
   - Does not leverage Plotly-specific interactive features like rangeslider
   - Creates both PNG and HTML outputs when only PNG is required
+  image_description: 'The plot displays a line chart of stock price data (USD) from
+    January 2024 to November 2024, with prices ranging from ~$85 to ~$115. The main
+    data line is rendered in a deep blue color (#306998) with a width of 4. Seven
+    event annotations are positioned throughout the chart using yellow (#FFD43B) vertical
+    dashed lines extending from the x-axis to label boxes at alternating heights (avoiding
+    overlap). Each event is marked on the price line itself with a yellow diamond
+    marker with a blue border. The events shown are: Q4 Earnings, Product Launch,
+    Q1 Earnings, Expansion Announced, Q2 Earnings, Partnership Deal, and Q3 Earnings.
+    The title "line-annotated-events · plotly · pyplots.ai" is centered at the top.
+    The Y-axis shows "Stock Price (USD)" with dollar sign prefix on tick labels, and
+    X-axis shows "Date". A legend in the upper left identifies the "Stock Price" trace.
+    The background is white with subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 40pt, axis labels at 36pt, ticks at 28pt - all excellently
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Alternating label heights prevent overlap, no text collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width 4 is good, diamond markers at size 18 are clear; annotations
+          well-sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Stock Price (USD)" and "Date" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Grid is subtle at alpha 0.1, legend well placed, but minor: gridlines
+          could be slightly more visible'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with event annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, price on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: vertical lines, markers, text labels,
+          alternating heights'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data properly with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Stock Price" trace
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows price trend with both ups and downs, multiple event types (earnings,
+          launches, deals); could show more variety in event impact
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price with quarterly earnings and corporate events is a realistic,
+          neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Stock prices $85-115 are realistic; 252 business days is accurate
+          for a year
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, plotly.graph_objects)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates plot.html (minor, but only png
+          required)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Figure, go.Scatter, add_shape, add_annotation, write_image/write_html;
+          hover templates are nice but could leverage more plotly-specific features
+          like rangeslider or buttons
+  verdict: APPROVED
diff --git a/plots/line-annotated-events/metadata/plotnine.yaml b/plots/line-annotated-events/metadata/plotnine.yaml
index 52f6aefe1f..03de1c1c3f 100644
--- a/plots/line-annotated-events/metadata/plotnine.yaml
+++ b/plots/line-annotated-events/metadata/plotnine.yaml
@@ -25,3 +25,176 @@ review:
   - Axis labels could include units (e.g., "Daily Visitors (count)")
   - Event markers at alternating heights work well but the fixed y_offset values (0.85,
     0.92) could be slightly higher to give more separation from the data line peaks
+  image_description: 'The plot displays a blue line chart showing daily website visitors
+    over the year 2024 (January through December). The y-axis ranges from approximately
+    40,000 to 55,000 daily visitors. Five vertical dashed yellow lines mark significant
+    events, each with a yellow diamond marker positioned at alternating heights (higher/lower)
+    to prevent label overlap. The event labels are: "v2.0 Release" (Feb 2024), "Mobile
+    App Launch" (May 2024), "API Update" (Jul 2024), "Enterprise Tier" (Oct 2024),
+    and "Holiday Campaign" (Dec 2024). The title follows the correct format "line-annotated-events
+    · plotnine · pyplots.ai". X-axis labels are rotated 45 degrees showing months.
+    The overall aesthetic is clean with a minimal theme and subtle gray grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, axis labels at 20pt, tick labels at 16pt, all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Event labels use alternating heights to avoid overlap, no text collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line weight of 1.2 is appropriate for 365 daily points, event markers
+          clearly visible with size=4 diamonds
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast, colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions overall, though the plot area could utilize slightly
+          more vertical space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Daily Visitors" and "Date" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is appropriately subtle (alpha 0.3), no legend needed for this
+          single-series plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with event annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, values on Y-axis, events correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has vertical lines (axvline), markers, text labels, dashed style
+          for events
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 365 data points and 5 events fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "line-annotated-events · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation, weekly patterns, multiple event types;
+          could show more dramatic impact of events on metrics
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic with product releases is an excellent, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 40k-55k daily visitors is plausible for a mid-sized website; seasonal/weekly
+          patterns realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used, no extras
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Good use of plotnine's grammar of graphics (ggplot + geom_* layering),
+          but doesn't leverage advanced features like faceting or statistical transformations
+  verdict: APPROVED
diff --git a/plots/line-annotated-events/metadata/seaborn.yaml b/plots/line-annotated-events/metadata/seaborn.yaml
index 436b356e35..023ea11efe 100644
--- a/plots/line-annotated-events/metadata/seaborn.yaml
+++ b/plots/line-annotated-events/metadata/seaborn.yaml
@@ -23,3 +23,173 @@ review:
     subtle legend or key)
   - Realistic context score could be higher with more domain-specific context (e.g.,
     actual product category mentioned in labels)
+  image_description: 'The plot displays a time series of daily sales (in units) from
+    January 2024 to January 2025. The main line is rendered in Python blue (#306998)
+    showing daily fluctuations with an upward trend from ~100 to ~180 units. Five
+    yellow vertical dashed lines mark marketing events: Valentine''s Campaign (Feb
+    14), Spring Sale (May 1), Summer Launch (Jul 15), Fall Promotion (Sep 20), and
+    Black Friday (Nov 25). Each event has a yellow background label with bold text
+    positioned at alternating heights (85%/75%) to avoid overlap. Yellow circular
+    markers with dark edges appear on the line at each event date. The title uses
+    the correct format "{spec-id} · seaborn · pyplots.ai". X-axis shows dates with
+    rotated labels, Y-axis shows "Daily Sales (Units)" with clear tick marks. A subtle
+    grid is visible in the background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Event labels use alternating heights, no text overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 2.5 appropriate for daily data, markers s=150 visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line and yellow markers provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins with tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Daily Sales (Units)" with units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle at alpha=0.3 (good), but no legend explaining event
+          marker colors
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: line plot with event annotations'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X, sales on Y, events properly positioned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Vertical lines (axvline), markers on line, text labels - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis range appropriate (95-185)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, events self-labeled)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"line-annotated-events · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows trend, seasonality, noise, and 5 diverse events across the
+          year
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Product sales with marketing events is plausible, dates match real
+          calendar events
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values 95-185 units/day are realistic for e-commerce
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean: imports → data → plot → styling → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (plt, np, pd, sns)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot correctly, but annotations done via matplotlib
+          directly. Seaborn's statistical features not leveraged, though reasonable
+          given the spec.
+  verdict: APPROVED
diff --git a/plots/line-basic/metadata/altair.yaml b/plots/line-basic/metadata/altair.yaml
index 5db7d6d9ad..dbe4689e84 100644
--- a/plots/line-basic/metadata/altair.yaml
+++ b/plots/line-basic/metadata/altair.yaml
@@ -23,3 +23,174 @@ review:
   - Missing Altair distinctive interactive features (tooltips, .interactive() for
     zoom/pan) that showcase the library strengths
   - Grid lines are too subtle (gridOpacity=0.3) making them barely visible
+  image_description: 'The plot displays a basic line chart showing monthly temperature
+    data over a full year (January to December 2024). The line is blue (#306998) connecting
+    12 data points, with filled circular markers at each point. The chart has a white
+    background with subtle gray grid lines. The title "line-basic · altair · pyplots.ai"
+    appears at the top center in black text. The Y-axis shows "Temperature (°C)" ranging
+    from 0 to 34, and the X-axis shows "Month" with date labels (Jan 07, Feb 04, etc.).
+    The trend shows a clear seasonal pattern: cold temperatures (~4°C) in winter months,
+    rising through spring, peaking around 30°C in late June, then declining through
+    autumn back to cold winter temperatures (~3.5°C in December).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line strokeWidth=4 and point size=200 are well-suited for 12 data
+          points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight excess whitespace at top of Y-axis (max
+          ~34 when peak is ~30)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Temperature (°C)" with units, X-axis has "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is very subtle (barely visible), could be slightly more prominent
+          for improved readability
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot connecting data points with straight lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is temporal (months), Y is numeric (temperature)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has line, optional markers, clear axis labels, grid lines as spec
+          requires
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single line (N/A - appropriate)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "line-basic · altair · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear trend pattern (seasonal temperature), but only 12 points
+          (spec suggests up to 200)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature readings is a plausible, comprehensible real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values 3-30°C are realistic for a temperate climate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only used imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Basic usage of Altair without leveraging its distinctive features
+          like interactivity (.interactive()), tooltips, or declarative selection
+  verdict: APPROVED
diff --git a/plots/line-basic/metadata/bokeh.yaml b/plots/line-basic/metadata/bokeh.yaml
index 8b1747484a..ba8775e02e 100644
--- a/plots/line-basic/metadata/bokeh.yaml
+++ b/plots/line-basic/metadata/bokeh.yaml
@@ -25,3 +25,176 @@ review:
   - No legend present (minor issue for single series)
   - Data pattern (warmest mid-month) is slightly artificial for typical monthly weather
     data
+  image_description: The plot displays a basic line chart showing daily temperature
+    readings over 31 days of a month. A blue line (#306998) connects all data points,
+    with small circular markers at each point featuring white borders. The title "line-basic
+    · bokeh · pyplots.ai" appears in the top-left corner. The X-axis is labeled "Day
+    of Month" ranging from 0 to ~31, and the Y-axis is labeled "Temperature (°C)"
+    ranging from approximately 19°C to 30°C. The background is a subtle light gray
+    (#fafafa) with dashed grid lines. The data shows a sinusoidal pattern with temperatures
+    warmer mid-month (peaking around day 20-21 at ~30°C) and cooler at the start and
+    end of the month (~19-21°C), with realistic daily variation/noise overlaid.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, axis labels at 32pt, tick labels at 24pt - all clearly
+          readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width of 5 and marker size of 16 are appropriate for 31 data
+          points; markers slightly small but visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, proper margins, no content cut off
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Day of Month" and
+          "Temperature (°C)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3, dashed) which is good, but no legend present
+          (single series, so acceptable but not perfect)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot connecting data points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X correctly shows sequential days, Y shows temperature values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clean single line, clear axis labels, grid lines, markers on data
+          points (all spec requirements met)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 31 days visible, temperature range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend (sinusoidal seasonal pattern), variation (noise), but
+          could show more dramatic features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings for a month is a perfect realistic scenario
+          mentioned in the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range 19-30°C is realistic; however the sinusoidal pattern
+          peaks mid-month which is slightly unusual for typical monthly weather
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource which is good Bokeh practice, but doesn't leverage
+          Bokeh's distinctive interactive features like hover tooltips or other interactive
+          elements. For a static export, this is acceptable but not exceptional.
+  verdict: APPROVED
diff --git a/plots/line-basic/metadata/highcharts.yaml b/plots/line-basic/metadata/highcharts.yaml
index 94b217d451..7bf4905f65 100644
--- a/plots/line-basic/metadata/highcharts.yaml
+++ b/plots/line-basic/metadata/highcharts.yaml
@@ -23,3 +23,173 @@ review:
   - Legend could be hidden for single-series plots to reduce visual clutter
   - Data shows smooth seasonal curve but lacks irregular variations that would better
     demonstrate line plot flexibility
+  image_description: The plot displays a line chart showing monthly temperature data
+    across a full year (January to December). The line is rendered in a dark blue
+    color (#306998) with circular markers at each data point. The chart clearly shows
+    a seasonal temperature pattern - cold winter months (Jan ~5°C, Dec ~6°C), rising
+    through spring, peaking in summer (Jul ~29°C), and declining through autumn. The
+    title "line-basic · highcharts · pyplots.ai" appears at the top. The Y-axis is
+    labeled "Temperature (°C)" and the X-axis is labeled "Month". Subtle dashed horizontal
+    grid lines help with value reading. The background is white with clean, professional
+    styling.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; month labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width and marker size are well-adapted for 12 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with adequate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Temperature (°C)" with units, X-axis has "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate, but legend shows "Temperature" which
+          is somewhat redundant for a single-series plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (sequential), Y=temperature (continuous)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clean minimal design, clear axis labels, grid lines, markers on data
+          points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from ~4 to 30
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend label correctly shows "Temperature"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows clear seasonal trend with rise and fall; could show more variability
+          or edge cases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature readings represent a realistic Northern Hemisphere
+          seasonal pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (5-29°C) for temperate climate annual temperatures
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Imports LineSeries from area module instead of line module (minor
+          issue)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Highcharts features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of chart options, series styling, and grid customization;
+          could leverage more interactive/advanced Highcharts features
+  verdict: APPROVED
diff --git a/plots/line-basic/metadata/letsplot.yaml b/plots/line-basic/metadata/letsplot.yaml
index 538803ec67..d2fd6d472f 100644
--- a/plots/line-basic/metadata/letsplot.yaml
+++ b/plots/line-basic/metadata/letsplot.yaml
@@ -24,3 +24,175 @@ review:
   - Does not leverage lets-plot specific interactive features like tooltips or hover
     effects
   - Markers (size=5) are slightly large for 12 data points; size=4 would be more proportionate
+  image_description: 'The plot displays a basic line chart showing monthly temperature
+    readings over 12 months. A blue line (#306998) with circular markers connects
+    data points from Month 1 to Month 12. The Y-axis shows "Temperature (°C)" ranging
+    from approximately 4 to 30 degrees. The data exhibits a realistic sinusoidal seasonal
+    pattern: low temperatures in winter months (~4°C in January, February, December),
+    gradually rising through spring, peaking in summer (~29°C in July), and declining
+    through fall. The background uses a minimal theme with light gray dashed grid
+    lines. The title "line-basic · letsplot · pyplots.ai" appears at the top left.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate font sizes (24pt title, 20pt labels, 16pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line thickness and marker sizes are appropriate for 12 data points;
+          markers are clearly visible (-1 for markers being slightly large for this
+          data density)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color is colorblind-safe; good contrast against white
+          background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, no content cut-off, well-balanced whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Temperature (°C)", X-axis has descriptive label
+          "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines are somewhat prominent; no legend needed for single series
+          but grid could be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (month) and Y (temperature) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clean minimal design, single line, clear axis labels, grid lines,
+          markers on data points - all spec features present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 data points visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (N/A - full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "line-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear trend/pattern over time with seasonal variation, demonstrates
+          line plot capability well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature readings is a realistic, comprehensible scenario
+          mentioned in spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (4-29°C) are realistic for seasonal variation
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic ggplot grammar but no distinctive lets-plot specific features
+          like interactive tooltips, tooltips, or built-in animations
+  verdict: APPROVED
diff --git a/plots/line-basic/metadata/matplotlib.yaml b/plots/line-basic/metadata/matplotlib.yaml
index 9b09d559c9..de0751719e 100644
--- a/plots/line-basic/metadata/matplotlib.yaml
+++ b/plots/line-basic/metadata/matplotlib.yaml
@@ -23,3 +23,173 @@ review:
     how line plots handle noisy data
   - Could use additional matplotlib features like fill_between for confidence intervals
     or ax.annotate for peak/trough labeling to score higher on library features
+  image_description: 'The plot displays a basic line chart showing monthly temperature
+    readings throughout a year. A single blue line (#306998) connects 12 data points,
+    each marked with a yellow (#FFD43B) circular marker with a blue border. The chart
+    demonstrates a classic seasonal temperature pattern: starting low in January (~4°C),
+    rising through spring and summer to peak in July (~29°C), then declining through
+    autumn back to winter lows in December (~4°C). The title reads "line-basic · matplotlib
+    · pyplots.ai" in the correct format. X-axis shows abbreviated month names (Jan-Dec),
+    Y-axis shows "Temperature (°C)" with values ranging from approximately 5 to 30.
+    A subtle dashed gray grid aids readability. Layout is clean with good whitespace.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All month labels clearly spaced, no overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3, marker size of 10 with contrasting colors - perfectly
+          visible for 12 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line with yellow markers provides excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, appropriate margins, no cut-off content
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Month" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate at alpha=0.3, but no legend present (not required
+          for single-line plot, but could improve clarity)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot connecting data points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = months (sequential), Y = temperature (continuous) - correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Single line, clear axis labels, grid lines, optional markers present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 months visible, temperature range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single line (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"line-basic · matplotlib · pyplots.ai" matches required format exactly'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows clear trend (seasonal pattern), but data is relatively smooth;
+          could show more variability to demonstrate line plot handling of noise
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature readings is a real, comprehensible scenario mentioned
+          in spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range 4-29°C is realistic for temperate climate seasonal
+          variation
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Axes methods, marker customization (markerfacecolor,
+          markeredgecolor, markeredgewidth), but no advanced matplotlib features like
+          annotations, fill_between, or secondary axes
+  verdict: APPROVED
diff --git a/plots/line-basic/metadata/plotly.yaml b/plots/line-basic/metadata/plotly.yaml
index 1354da8c1f..ca62c74102 100644
--- a/plots/line-basic/metadata/plotly.yaml
+++ b/plots/line-basic/metadata/plotly.yaml
@@ -23,3 +23,173 @@ review:
   - Grid lines could be slightly more visible (current alpha 0.1 is quite subtle)
   - Could leverage more distinctive Plotly features like spike lines, range selector,
     or annotations
+  image_description: 'The plot displays a basic line chart showing monthly temperature
+    data across 12 months (January to December). The line is rendered in a dark blue
+    color (#306998) with circular markers at each data point. The chart shows a clear
+    seasonal temperature pattern: cold temperatures (~4°C) in winter months (Jan,
+    Feb, Dec), rising through spring, peaking in July (~29°C), and declining through
+    autumn. The title "line-basic · plotly · pyplots.ai" is centered at the top. The
+    X-axis shows month abbreviations (Jan-Dec), and the Y-axis displays "Temperature
+    (°C)" with values ranging from approximately 5-30. Subtle gray grid lines appear
+    on both axes. The background is clean white (plotly_white template).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate font sizes (title 40pt, axis labels 36pt, ticks 28pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; month labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width (5) and marker size (18) are well-suited for 12 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) provides good contrast and is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions with adequate margins; slight excess whitespace
+          on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units "Temperature (°C)"; X-axis "Month" is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend needed for single line
+          (but grid alpha could be slightly more visible)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot connecting data points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (sequential), Y=temperature (continuous)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Clean minimal design, clear axis labels, grid lines, markers on data
+          points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 months visible with appropriate temperature range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single line; no legend clutter
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "line-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal trend clearly with realistic variation; could show
+          more noise/variation in readings
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature readings with realistic seasonal sinusoidal pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range ~4-29°C is realistic for temperate climate annual
+          cycle
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses hovertemplate for interactivity in HTML output, but could leverage
+          more Plotly features like annotations, range sliders, or spike lines
+  verdict: APPROVED
diff --git a/plots/line-basic/metadata/plotnine.yaml b/plots/line-basic/metadata/plotnine.yaml
index eafdd67d43..e7274cff4c 100644
--- a/plots/line-basic/metadata/plotnine.yaml
+++ b/plots/line-basic/metadata/plotnine.yaml
@@ -25,3 +25,180 @@ review:
   - X-axis shows decimal tick values (2.5, 5.0, 7.5, 10.0, 12.5) instead of integer
     months (1-12); should use scale_x_continuous with breaks=range(1,13) for cleaner
     month representation
+  image_description: The plot displays a basic line chart showing monthly temperature
+    data over a 12-month period. The line is rendered in a blue color (#306998) connecting
+    12 data points, each marked with circular markers of the same blue color. The
+    X-axis shows "Month" with decimal tick labels (2.5, 5.0, 7.5, 10.0, 12.5), and
+    the Y-axis shows "Temperature (°C)" ranging from approximately 5 to 27 degrees.
+    The title "line-basic · plotnine · pyplots.ai" is displayed at the top. The plot
+    uses a minimal theme with subtle gray grid lines and a clean white background.
+    The data follows a realistic seasonal temperature pattern - low in winter months
+    (1-2, 11-12), rising through spring, peaking in summer (month 7), and declining
+    through autumn.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable with
+          appropriate font sizes (24pt title, 20pt axis labels, 16pt tick labels)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width (2.5) and point size (6) are well-suited for 12 data points,
+          providing excellent visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) has good contrast against white background,
+          no colorblind issues with single-series data
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, good use of 16:9 aspect
+          ratio
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis has units "Temperature (°C)" but X-axis "Month" could benefit
+          from showing month names or clearer integer ticks instead of decimals (2.5,
+          5.0, etc.)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3), but the X-axis decimal ticks (2.5, 5.0,
+          7.5) are awkward for monthly data - should show integers 1-12
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct basic line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (month) and Y (temperature) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has line connecting points, markers on data points, grid lines, clear
+          axis labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 data points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "line-basic · plotnine · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows complete seasonal cycle with rising trend, peak, and decline
+          - demonstrates line plot's ability to show trends over time
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature data is a classic, relatable use case for line
+          plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (5-27°C) are realistic for a temperate climate
+          annual cycle
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Good use of plotnine''s grammar of graphics: ggplot + aes + geom_line
+          + geom_point + labs + theme_minimal + theme customization with element_text
+          and element_line'
+  verdict: APPROVED
diff --git a/plots/line-basic/metadata/pygal.yaml b/plots/line-basic/metadata/pygal.yaml
index a04cc053b4..5c46b3bc3b 100644
--- a/plots/line-basic/metadata/pygal.yaml
+++ b/plots/line-basic/metadata/pygal.yaml
@@ -23,3 +23,171 @@ review:
   - Font sizes in the library rules suggest smaller values (28/18/16) but implementation
     uses larger (72/48/42) - while this works for the large canvas, it deviates from
     the template
+  image_description: The plot displays a clean line chart showing monthly average
+    temperatures for a temperate climate. A single soft blue line (#306998) connects
+    12 data points across the months (Jan-Dec), forming the characteristic bell curve
+    of seasonal temperature variation. Small circular markers are visible at each
+    data point. The title "line-basic · pygal · pyplots.ai" is centered at the top
+    in gray text. The Y-axis is labeled "Temperature (°C)" with values ranging from
+    ~2 to 22, and the X-axis is labeled "Month" with all 12 month abbreviations clearly
+    displayed. Subtle horizontal dotted grid lines aid readability. A legend reading
+    "Average Temperature" appears at the bottom left. The overall design is clean
+    with a white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; tick labels slightly
+          smaller than ideal
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Dots visible but could be slightly larger; line width appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, excellent contrast on white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-proportioned with good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Temperature (°C)", "Month"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but legend placement at bottom-left is awkward
+          and small
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (sequential), Y=temperature (continuous) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Line connecting points, markers on data points, grid lines present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 months visible, Y-axis range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labeled "Average Temperature"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear seasonal trend with peak in summer; could show more variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature data is a classic, real-world use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (2.3°C to 22.1°C) realistic for temperate climate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses custom Style class and pygal-specific options (show_dots, dots_size,
+          stroke_style), but doesn't leverage tooltips or other interactive SVG features
+          that make pygal distinctive
+  verdict: APPROVED
diff --git a/plots/line-basic/metadata/seaborn.yaml b/plots/line-basic/metadata/seaborn.yaml
index dc945245bb..62b8cd91de 100644
--- a/plots/line-basic/metadata/seaborn.yaml
+++ b/plots/line-basic/metadata/seaborn.yaml
@@ -25,3 +25,174 @@ review:
     variation to show real-world noise
   - Does not leverage seaborn distinctive statistical features (confidence intervals,
     error bands)
+  image_description: The plot displays a line chart showing monthly temperature data
+    over 12 months. A single blue line (#306998) connects 12 data points with circular
+    markers. The line shows a clear sinusoidal pattern typical of seasonal temperature
+    variation - starting low around 4°C in January/February, peaking at approximately
+    30°C in July, then declining back to about 4°C in December. The x-axis is labeled
+    "Month" with integer values 1-12, and the y-axis is labeled "Temperature (°C)"
+    ranging from approximately 5 to 30. The title correctly displays "line-basic ·
+    seaborn · pyplots.ai". A subtle dashed grid is visible in the background. The
+    overall layout is clean with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 and marker size of 12 perfectly adapted for 12 data
+          points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, 16:9 aspect ratio well utilized
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Month" and "Temperature (°C)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: false
+        comment: → 0/2 - Grid at alpha=0.3 is good, but no legend present (though
+          not strictly required for single line)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Month (sequential), Y=Temperature (continuous) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Single line, clear axis labels, grid lines, markers on data points
+          - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all 12 data points without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Single line, legend not required
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows trend clearly, but could demonstrate more variation patterns
+          (e.g., some noise or anomalies)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature readings is a perfect real-world scenario mentioned
+          in spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range ~4-30°C is realistic for temperate climate annual
+          variation
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (matplotlib, numpy, pandas, seaborn) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.lineplot which is seaborn's main line plotting function,
+          but doesn't leverage more distinctive features like confidence intervals
+          or hue grouping
+  verdict: APPROVED
diff --git a/plots/line-confidence/metadata/altair.yaml b/plots/line-confidence/metadata/altair.yaml
index 56499fb8f5..eb2cdd869f 100644
--- a/plots/line-confidence/metadata/altair.yaml
+++ b/plots/line-confidence/metadata/altair.yaml
@@ -26,3 +26,171 @@ review:
   - Y-axis starts at 0 but data ranges ~90-180, wasting nearly half the vertical canvas
     space (the code calculates y_domain but Altair appears to ignore it or it is not
     applied correctly to all layers)
+  image_description: The plot displays a line chart with a confidence interval on
+    a white background. A dark navy blue line (#306998) shows the predicted mean values,
+    surrounded by a light blue semi-transparent band representing the 95% confidence
+    interval. The x-axis is labeled "Day" (ranging 0-50) and the y-axis is labeled
+    "Predicted Value" (ranging 0-200). The title "line-confidence · altair · pyplots.ai"
+    appears at the top center. The confidence band appropriately widens as time progresses,
+    simulating forecast uncertainty. Point markers are visible along the line. However,
+    **no legend is visible** in the rendered image despite the code attempting to
+    create one. The y-axis starts at 0 creating significant whitespace below the data
+    (which ranges ~90-180).
+  criteria_checklist:
+    visual_quality:
+      score: 31
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clear and readable at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and band are clearly visible, point markers present but small
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses single color scheme, colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 0
+        max: 5
+        passed: false
+        comment: Y-axis starts at 0 instead of data range, creating ~50% wasted vertical
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (good), but legend is NOT visible despite spec requirement
+    spec_compliance:
+      score: 20
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with confidence interval
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Day, Y=Value, band=Lower/Upper correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Missing visible legend (spec explicitly requires "legend that clearly
+          identifies both the central line and confidence band")
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible (though Y-axis range is suboptimal)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend not rendered/visible in output image
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-confidence · altair · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows widening confidence interval over time, trend, and variability
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Time series forecast scenario is plausible but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 50 days, values ~100-170, realistic for forecasting
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only used imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: The resolve_legend approach with transform_calculate is complex but
+          the legend doesn't render properly. Interactive HTML version with tooltips
+          is good, but the static PNG which is the primary output lacks the legend.
+  verdict: APPROVED
diff --git a/plots/line-confidence/metadata/bokeh.yaml b/plots/line-confidence/metadata/bokeh.yaml
index 170f7b1b3d..1d1edc3803 100644
--- a/plots/line-confidence/metadata/bokeh.yaml
+++ b/plots/line-confidence/metadata/bokeh.yaml
@@ -22,3 +22,177 @@ review:
   weaknesses:
   - Legend placement in top-left corner is cramped and slightly overlaps with plot
     area visually
+  image_description: The plot displays a line chart with a 95% confidence interval
+    band on a light gray (#fafafa) background. The title "line-confidence · bokeh
+    · pyplots.ai" appears at the top left in a reasonably sized font. A legend in
+    the top-left corner identifies "Prediction" (dark blue line) and "95% Confidence
+    Interval" (light blue shaded band). The x-axis is labeled "Time (units)" ranging
+    from 0 to 10, and the y-axis is labeled "Predicted Value" ranging from approximately
+    0 to 20. The central prediction line shows an upward curving trend with some noise,
+    starting around 2 and ending around 16. The confidence band is semi-transparent
+    and widens progressively as time increases, effectively demonstrating growing
+    uncertainty in predictions. Dashed grid lines are subtle in the background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, though tick labels could be slightly
+          larger for optimal viewing at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 5 is appropriate, confidence band is clearly visible
+          with good alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though legend could be positioned more
+          optimally (slightly crowds corner)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Time (units)", "Predicted Value"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but legend placement crowds the top-left
+          corner slightly
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with confidence interval
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (time), Y (prediction), y_lower/y_upper correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: central line, shaded band, legend identifying
+          both'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Prediction" and "95% Confidence Interval"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-confidence · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows widening confidence interval and trend, but could show more
+          variation patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plausible model prediction scenario with appropriate uncertainty
+          growth
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable, though y-axis starting at 0 leaves some empty
+          space at bottom
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates plot.html (minor, but adds unnecessary
+          output)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses varea for confidence band and ColumnDataSource, but could leverage
+          more Bokeh-specific features like HoverTool for interactivity
+  verdict: APPROVED
diff --git a/plots/line-confidence/metadata/highcharts.yaml b/plots/line-confidence/metadata/highcharts.yaml
index 8ddd776f76..61f5eafc7c 100644
--- a/plots/line-confidence/metadata/highcharts.yaml
+++ b/plots/line-confidence/metadata/highcharts.yaml
@@ -29,3 +29,175 @@ review:
     for better association
   - Does not leverage interactive Highcharts features (though this is acceptable for
     static PNG output)
+  image_description: The plot displays a line chart with a confidence interval on
+    a white background. A dark blue (#306998) central line represents the "Prediction"
+    values, showing an upward trend with sinusoidal oscillations over 50 days. The
+    light blue semi-transparent shaded band (30% opacity) represents the "95% Confidence
+    Interval" which visibly widens as time increases, illustrating growing uncertainty.
+    The title "line-confidence · highcharts · pyplots.ai" appears at the top in bold,
+    with a subtitle "Model Predictions with 95% Confidence Interval" below it. The
+    Y-axis is labeled "Predicted Value" (ranging from ~80-280), and the X-axis is
+    labeled "Day" (1-50). A vertical legend in the top-right corner identifies both
+    series. Dashed gray grid lines enhance readability.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend are all clearly readable
+          at the high resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width is appropriate (6px), confidence band is visible with
+          good alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses single color family (blue) which is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; minor issue with legend being slightly
+          far from chart
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Day", "Predicted Value") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed style; legend well-positioned but could
+          be closer to data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with confidence interval band
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (days), Y (predictions), lower/upper bounds correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: solid central line, semi-transparent
+          band, legend, grid'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Prediction" line and "95% Confidence
+          Interval" band
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-confidence · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows key features: trend with oscillation, widening uncertainty
+          over time'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Model predictions scenario is plausible; subtitle provides context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in sensible range (~100-250), 50 data points appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic output
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts_core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic AreaRangeSeries and LineSeries; does not leverage Highcharts-specific
+          features like tooltips, zoom, or interactive hover states
+  verdict: APPROVED
diff --git a/plots/line-confidence/metadata/letsplot.yaml b/plots/line-confidence/metadata/letsplot.yaml
index 0b3765b31e..11b303c490 100644
--- a/plots/line-confidence/metadata/letsplot.yaml
+++ b/plots/line-confidence/metadata/letsplot.yaml
@@ -25,3 +25,174 @@ review:
     requires this)
   - Grid lines render with dark outline on the confidence band edge rather than being
     purely subtle
+  image_description: The plot displays a 24-month sales forecast with a confidence
+    interval. It features a dark blue trend line connecting yellow circular data points,
+    surrounded by a light blue semi-transparent ribbon showing the 95% confidence
+    band. The confidence interval widens progressively over time (from ~±10 at month
+    1 to ~±20 at month 24), effectively demonstrating forecast uncertainty growth.
+    The y-axis shows "Sales (thousands)" ranging from ~50-125, and the x-axis shows
+    "Month" from 0-24 with tick marks at intervals of 3. The title "line-confidence
+    · letsplot · pyplots.ai" appears at the top. Subtle gray grid lines provide reference
+    without overwhelming the data. The overall layout is clean with good use of the
+    canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and points are visible, though points could be slightly larger
+          for optimal visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight excess whitespace at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Sales (thousands)", X-axis has descriptive "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, but **no legend** to identify the
+          confidence band vs. central line as spec requires
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with confidence interval
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (Month) and Y (Sales/bounds) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Has line, band, and points, but **missing legend** that spec explicitly
+          requires
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend present)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "line-confidence · letsplot · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows widening confidence interval over time and seasonal pattern,
+          but could show more variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales forecast is a plausible, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values (50-125 thousands) and 24-month horizon are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (geom_ribbon, geom_line, geom_point, theme_minimal),
+          but could leverage more lets-plot specific features like tooltips or interactive
+          features
+  verdict: APPROVED
diff --git a/plots/line-confidence/metadata/matplotlib.yaml b/plots/line-confidence/metadata/matplotlib.yaml
index ba4a70751b..53bbfd2a79 100644
--- a/plots/line-confidence/metadata/matplotlib.yaml
+++ b/plots/line-confidence/metadata/matplotlib.yaml
@@ -21,3 +21,162 @@ review:
   weaknesses:
   - Could use contrasting colors for line vs band as suggested in spec (e.g., dark
     blue line with lighter blue band - currently same color)
+  image_description: The plot displays a 30-day temperature forecast with a confidence
+    interval. A dark blue solid line (labeled "Forecast Mean") shows the central temperature
+    trend, ranging from approximately 16°C to 23°C over 30 days with an overall upward
+    trend and sinusoidal variation. A semi-transparent light blue shaded band (labeled
+    "95% Confidence Interval") surrounds the central line, widening progressively
+    as days increase to reflect growing forecast uncertainty. The title "line-confidence
+    · matplotlib · pyplots.ai" appears at the top. X-axis is labeled "Days Ahead"
+    (1-30), Y-axis is labeled "Temperature (°C)" (approximately 14-29). A legend in
+    the upper left corner identifies both elements. A subtle dashed grid aids readability.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 3 is appropriate, confidence band clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" and "Days Ahead" are descriptive with units'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with confidence interval (fill_between)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=days, Y=temperature, lower/upper bounds correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Central line, shaded band, legend identifying both elements
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Forecast Mean" and "95% Confidence Interval"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "line-confidence · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows widening confidence interval over time, sinusoidal variation
+          in trend
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Temperature forecast is realistic; values plausible for spring weather
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range 14-29°C is realistic for weather forecasting
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of fill_between for confidence band, Axes methods properly
+          used; could explore additional matplotlib features
+  verdict: APPROVED
diff --git a/plots/line-confidence/metadata/plotly.yaml b/plots/line-confidence/metadata/plotly.yaml
index ba0b497718..cbe533cfe0 100644
--- a/plots/line-confidence/metadata/plotly.yaml
+++ b/plots/line-confidence/metadata/plotly.yaml
@@ -23,3 +23,173 @@ review:
   - Legend in upper-left overlaps slightly with the confidence band at its peak
   - Could leverage Plotly-specific features like custom hover templates to show exact
     values on mouseover
+  image_description: The plot displays a line chart showing monthly temperature data
+    over 50 months. A dark blue solid line represents the mean temperature, oscillating
+    between approximately 7°C and 25°C with clear seasonal periodicity (annual cycles).
+    The line is surrounded by a light blue semi-transparent shaded band representing
+    the 95% confidence interval. The confidence band widens slightly over time, demonstrating
+    increasing uncertainty. The title "line-confidence · plotly · pyplots.ai" is centered
+    at the top. The x-axis is labeled "Month" (ranging 0-50), and the y-axis is labeled
+    "Temperature (°C)" (ranging 0-30). A legend in the upper-left corner identifies
+    "95% Confidence Interval" and "Mean Temperature". Grid lines are subtle gray.
+    The overall layout is clean with a white background.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at large
+          font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width is appropriate (width=4), confidence band clearly visible
+          with good alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue color scheme is colorblind-safe; single-hue approach works well
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units; "Month" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but legend placement in upper-left slightly
+          overlaps with confidence band
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with confidence interval
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (months) and Y (temperature) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Central line, shaded confidence band, legend identifying both elements
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Mean Temperature" and "95% Confidence
+          Interval"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-confidence · plotly · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows seasonal variation, widening uncertainty over time, realistic
+          temperature range
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature forecast is a perfect real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (7-25°C) are realistic for a temperate climate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Does not leverage Plotly's interactivity features in the static output;
+          could use hover templates or annotations
+  verdict: APPROVED
diff --git a/plots/line-confidence/metadata/plotnine.yaml b/plots/line-confidence/metadata/plotnine.yaml
index b792b790c7..6bdfc55acd 100644
--- a/plots/line-confidence/metadata/plotnine.yaml
+++ b/plots/line-confidence/metadata/plotnine.yaml
@@ -24,3 +24,177 @@ review:
   - Legend shows two separate entries due to using both fill and color aesthetics
     - could be combined into single legend item
   - X-axis label Month lacks temporal context
+  image_description: The plot displays a line chart with a confidence interval band
+    showing monthly sales forecast data over 24 months. The central trend line is
+    a solid dark blue (#306998) line that shows an upward trend with seasonal fluctuations,
+    starting around 55 and ending near 105 thousand. The confidence interval is rendered
+    as a semi-transparent light blue shaded band (alpha 0.3) surrounding the central
+    line. The band widens progressively as months increase, correctly representing
+    growing uncertainty in forecasts over time. The title "line-confidence · plotnine
+    · pyplots.ai" appears at the top in large text. Axis labels show "Month" (x-axis,
+    0-25 range) and "Sales (thousands)" (y-axis, 60-120 range). A legend on the right
+    side displays "95% CI" for the shaded band and "Forecast" for the line. The plot
+    uses a clean minimal theme with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear at 24pt, axis labels at 20pt, tick labels
+          at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout throughout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line is clearly visible with size=1.5, confidence band appropriately
+          sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (blue) with transparency variation - no colorblind
+          issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned cleanly
+          on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis has units "(thousands)" but X-axis "Month" could specify the
+          unit context better
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle with minimal theme but legend has empty string labels
+          for fill/color which is acceptable but shows two separate legend items
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with confidence interval band
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Month, Y=Sales, y_lower/y_upper correctly mapped to ribbon
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Central line, shaded confidence band, legend identifying both elements
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 24 data points visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "95% CI" and "Forecast"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-confidence · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend with seasonality and widening CI over time, demonstrates
+          key features well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales forecast is a real, comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: false
+        comment: Values are reasonable for sales in thousands, though starting at
+          ~55k and ending at ~105k is plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of plotnine''s grammar of graphics: ggplot + aes +
+          geom_ribbon + geom_line + scale_manual + theme_minimal + theme customization'
+  verdict: APPROVED
diff --git a/plots/line-confidence/metadata/seaborn.yaml b/plots/line-confidence/metadata/seaborn.yaml
index ea19adc85d..9a64fd369f 100644
--- a/plots/line-confidence/metadata/seaborn.yaml
+++ b/plots/line-confidence/metadata/seaborn.yaml
@@ -24,3 +24,171 @@ review:
   - Uses matplotlib fill_between instead of seaborn native confidence interval features
     (sns.lineplot can compute CI automatically from raw data)
   - Legend position slightly overlaps with early confidence band data
+  image_description: The plot displays a 24-month sales forecast with a 95% confidence
+    interval. A dark blue (#306998) solid line shows the central forecast trend, exhibiting
+    an upward trajectory with clear seasonal oscillations. The forecast is surrounded
+    by a semi-transparent light blue shaded band representing the confidence interval,
+    which visibly widens as months progress (demonstrating growing uncertainty over
+    time). The title "line-confidence · seaborn · pyplots.ai" is positioned at the
+    top. The X-axis is labeled "Month" (ranging 0-25), and the Y-axis is labeled "Sales
+    (Units)" (ranging approximately 80-200). A legend in the upper left corner identifies
+    the confidence interval and forecast line. Subtle dashed grid lines aid readability.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width (3) appropriate, confidence band alpha (0.3) well-balanced
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Month" and "Sales (Units)" are descriptive, but "Month" could include
+          context (e.g., "Month (from Jan 2024)")'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3, dashed), but legend overlaps slightly
+          with the confidence band at months 1-4
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with confidence interval
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months, Y=forecast values, bounds correctly computed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Central line, shaded confidence band, legend identifying both elements
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "95% Confidence Interval" and "Forecast"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exact format: "line-confidence · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows upward trend, seasonality, and growing uncertainty; could demonstrate
+          more variation in confidence band width
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales forecast is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: false
+        comment: Values (80-200 units) are plausible, though units could be more specific
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn/matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" correctly ✓ (actually correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot and sns.set_style, but fill_between is matplotlib.
+          Could use seaborn's native confidence interval support (e.g., sns.lineplot
+          with ci parameter or sns.regplot)
+  verdict: APPROVED
diff --git a/plots/line-filled/metadata/altair.yaml b/plots/line-filled/metadata/altair.yaml
index b0b93be837..2921a4d4b6 100644
--- a/plots/line-filled/metadata/altair.yaml
+++ b/plots/line-filled/metadata/altair.yaml
@@ -24,3 +24,173 @@ review:
   - Axis labels lack units (could be "Daily Visitors (count)" or "Day of Month (day)")
   - The gradient direction could potentially be inverted (darker at line, lighter
     toward baseline) to better emphasize the data
+  image_description: The plot displays a filled line chart showing "Daily Visitors"
+    (y-axis, ranging from 0 to 9,000) over "Day of Month" (x-axis, days 1-30). The
+    title "line-filled · altair · pyplots.ai" is displayed at the top. A blue line
+    (#306998) traces the daily visitor data, which shows an overall upward trend with
+    weekly cyclical patterns. The area beneath the line is filled with a gradient
+    from semi-transparent blue at the bottom to lighter blue near the line. The y-axis
+    starts at 0 (proper baseline), and the data shows realistic website traffic ranging
+    from approximately 4,600 to 8,700 visitors. The grid is subtle with dashed lines
+    at approximately 0.3 opacity.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line is clearly visible with 3px stroke, fill gradient is well-executed
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue), colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout, though baseline at y=0 creates some empty space at bottom;
+          acceptable for area charts
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Day of Month", "Daily Visitors") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled line/area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=continuous time (days), Y=numeric (visitors)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (gradient 0.1-0.4 alpha), visible line on top,
+          fill color matches line, baseline at y=0
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis properly starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "line-filled · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows magnitude emphasis, trend over time, realistic variation;
+          minor: could show more dramatic peaks/valleys'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic scenario is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible (4,600-8,700 daily visitors); the growth trend
+          and weekly patterns are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Altair features: alt.Gradient for fill gradient,
+          alt.Title, mark_area with line property, tooltip encoding, configure_axis
+          for styling'
+  verdict: APPROVED
diff --git a/plots/line-filled/metadata/bokeh.yaml b/plots/line-filled/metadata/bokeh.yaml
index 7ea5c4c12b..715a2f6269 100644
--- a/plots/line-filled/metadata/bokeh.yaml
+++ b/plots/line-filled/metadata/bokeh.yaml
@@ -23,3 +23,172 @@ review:
   - Axis labels lack units (e.g., "Website Visitors (count)" or "Month of Year")
   - Could use more distinctive Bokeh features like CustomJS, band annotations, or
     span annotations to highlight peaks
+  image_description: The plot shows a filled line chart displaying website traffic
+    (visitors) over 12 months. The area beneath the line is filled with a semi-transparent
+    blue color (#306998 with alpha 0.4). A solid blue line traces the top of the filled
+    area, with small circular markers at each data point. The chart shows a seasonal
+    pattern peaking around month 7 (July) at approximately 81,000 visitors, with lower
+    values at the start (~40,000) and end (~57,000) of the year. The background is
+    a light gray (#fafafa), and there is a subtle dashed grid. The title "line-filled
+    · bokeh · pyplots.ai" appears in the top-left. The x-axis shows months 1-12 labeled
+    as "Month", and the y-axis shows "Website Visitors" ranging from 0 to ~80,000.
+    Bokeh toolbar icons are visible in the top-right corner.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are readable but could be slightly
+          larger for the canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line, fill, and markers are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight imbalance with toolbar in corner
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (e.g., "Website Visitors (count)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha 0.3, no legend needed
+          for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled line/area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (months) and Y (visitors) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill, visible line on top, fill matches line color
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0 (baseline)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "line-filled · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation and trend well, demonstrates magnitude emphasis
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible for website traffic, though seasonal amplitude
+          is slightly exaggerated
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and HoverTool for interactivity, varea for
+          fill; could leverage more Bokeh-specific features like CustomJS callbacks
+          or more advanced tooltips
+  verdict: APPROVED
diff --git a/plots/line-filled/metadata/highcharts.yaml b/plots/line-filled/metadata/highcharts.yaml
index 64ce81d704..55b7e843e3 100644
--- a/plots/line-filled/metadata/highcharts.yaml
+++ b/plots/line-filled/metadata/highcharts.yaml
@@ -23,3 +23,177 @@ review:
   - Y-axis label could benefit from units (e.g., Page Views thousands)
   - Legend positioned far from data in top-right corner; could be integrated closer
     to the chart
+  image_description: The plot displays a filled line chart (area chart) showing "Monthly
+    Website Traffic" over 12 months (Jan-Dec). The chart uses a blue color (#306998)
+    with a gradient fill that fades from semi-transparent blue at the top to nearly
+    transparent at the bottom. Data points are marked with small circular markers
+    with white borders. The line shows seasonal variation with a peak around March-April
+    (~72k page views) and a trough around September (~40k). The title "line-filled
+    · highcharts · pyplots.ai" appears at the top in bold, with a subtitle "Monthly
+    Website Traffic" below it. The Y-axis shows "Page Views" with values from 0 to
+    ~77.5k, and the X-axis shows month abbreviations. A legend "Website Traffic" appears
+    in the top-right corner.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are readable, though Y-axis label "Page Views" appears
+          slightly rotated and could be larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line is well-defined, markers are appropriately sized, fill gradient
+          is attractive
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout but Y-axis starting at 0 creates large empty space below
+          the data
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Page Views" is descriptive but lacks units; "Month" is generic'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, but legend is far from the data in the top-right
+          corner
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled line/area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time (months), Y is numeric value (traffic)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (~0.4 opacity), visible line on top, fill matches
+          line color, baseline at y=0
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis starts at 0 as spec requires
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labeled "Website Traffic"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "line-filled · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation, growth trend, and magnitude emphasis; could
+          show more dramatic peaks/valleys
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a perfect, neutral, realistic scenario for this
+          plot type
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (40k-72k page views) are realistic for a mid-sized website
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple linear structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but also "plot.html" (acceptable for interactive
+          library)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses gradient fill, proper area series, markers with styling, but
+          doesn't leverage advanced Highcharts features like tooltips configuration
+          or animation options
+  verdict: APPROVED
diff --git a/plots/line-filled/metadata/letsplot.yaml b/plots/line-filled/metadata/letsplot.yaml
index 9b70506e42..3e702a10fc 100644
--- a/plots/line-filled/metadata/letsplot.yaml
+++ b/plots/line-filled/metadata/letsplot.yaml
@@ -21,3 +21,172 @@ review:
   weaknesses:
   - Axis labels lack units (e.g., "Month (2024)" or "Visitors (thousands)")
   - Data variation could be more pronounced to better showcase the filled area effect
+  image_description: The plot displays a filled line chart showing monthly website
+    traffic over 12 months. The chart uses a blue color (#306998) with a semi-transparent
+    fill (alpha ~0.4) below the line. A solid blue line traces the data points, with
+    blue circular markers at each month. The y-axis shows "Website Visitors" ranging
+    from 0 to approximately 70,000, and the x-axis shows "Month" with values 1-12.
+    The title "line-filled · letsplot · pyplots.ai" appears at the top. The plot uses
+    a minimal theme with subtle gray horizontal grid lines on a white background.
+    The data shows variation with a peak around month 4 (~70,000 visitors) and a dip
+    at month 2 (~55,000 visitors).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width and point markers are appropriately sized for 12 data
+          points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, data fills the plot area well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Month" and "Website Visitors" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate; minor grid disabled which is good;
+          no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled line (area) chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=continuous (months), Y=numeric (visitors)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill, visible line on top, fill matches line color
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts from 0 (correct baseline)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-filled · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in the data with peaks and valleys; could have more
+          dramatic variation to better showcase the fill
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a perfect real-world scenario for this plot type
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for website traffic (50k-70k monthly visitors)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses geom_area, geom_line, geom_point, theme_minimal correctly but
+          doesn't showcase lets-plot specific features like tooltips or interactivity
+          in the plot design
+  verdict: APPROVED
diff --git a/plots/line-filled/metadata/matplotlib.yaml b/plots/line-filled/metadata/matplotlib.yaml
index b32e2d8cd9..34ea2b857a 100644
--- a/plots/line-filled/metadata/matplotlib.yaml
+++ b/plots/line-filled/metadata/matplotlib.yaml
@@ -23,3 +23,172 @@ review:
   - Axis labels lack units (could use "Month of Year" or "Website Visitors (thousands)")
   - Could add markers at data points to show exact monthly values
   - Library features score limited - could explore gradient fills or secondary styling
+  image_description: The plot displays a filled line chart showing "Website Visitors"
+    (y-axis) over 12 months from January to December (x-axis). The fill and line are
+    both in a blue color (#306998) with the fill being semi-transparent (alpha ~0.4).
+    The line peaks around July (~68,000 visitors) with a clear seasonal pattern -
+    lower in winter months (~35,000-40,000) and higher in summer months (~62,000-68,000).
+    The title "line-filled · matplotlib · pyplots.ai" appears at the top. Grid lines
+    are dashed with low alpha. The y-axis starts at 0 and goes to 70,000, and all
+    month abbreviations are clearly visible on the x-axis.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, month labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line thickness (3) is optimal, fill area is clearly visible with
+          good alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins with tight_layout()
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Month" and "Website Visitors" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3 with dashed style, no legend needed for
+          single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled line plot using fill_between + plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (continuous/time), Y=numeric values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (alpha=0.4), line visible on top, matching
+          colors, baseline at y=0
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, shows all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (appropriate)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "line-filled · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation well, demonstrates magnitude emphasis; could
+          show more dramatic variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic with seasonal trends is a real, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 35k-68k are realistic for medium website; could use rounder
+          numbers
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API (ax methods)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses fill_between correctly which is matplotlib-specific, but no
+          advanced features like gradient fills or custom styling
+  verdict: APPROVED
diff --git a/plots/line-filled/metadata/plotly.yaml b/plots/line-filled/metadata/plotly.yaml
index 68d23b83bf..09d8a39465 100644
--- a/plots/line-filled/metadata/plotly.yaml
+++ b/plots/line-filled/metadata/plotly.yaml
@@ -25,3 +25,174 @@ review:
   - Grid opacity (0.1) is slightly too subtle - could be 0.2-0.3 for better readability
   - Data could show more realistic noise/variability to demonstrate the plot type
     better
+  image_description: The plot shows a filled line chart displaying website traffic
+    over 12 months (Jan-Dec). The line is rendered in a dark blue color (#306998)
+    with a width of 4, and the area beneath it is filled with a semi-transparent light
+    blue (rgba with ~0.35 alpha). The chart shows a seasonal pattern with traffic
+    starting around 42K in January, rising to a peak of approximately 84K in July,
+    then declining through the fall to around 57K in December. The title "line-filled
+    · plotly · pyplots.ai" is centered at the top in dark gray. The y-axis is labeled
+    "Website Visitors (thousands)" and ranges from 0 to ~85, while the x-axis is labeled
+    "Month" with abbreviated month names. The background is white with subtle gray
+    gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (32pt), axis titles are 24pt, tick labels are 18pt
+          - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; month labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 4 is clearly visible, fill is appropriately transparent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins (l=100, r=60, t=100,
+          b=80)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "(thousands)", X-axis labeled "Month" - descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.1), but no legend shown (showlegend=False)
+          - acceptable for single series but could have legend for completeness
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled line/area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time (months), Y is numeric value (traffic)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (~0.35 alpha), visible line on top, fill to
+          zero baseline
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0 (rangemode="tozero"), all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series with no legend shown
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "line-filled · plotly · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows seasonal variation and trend well, but data is relatively smooth
+          - could show more variability
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a perfect real-world scenario for filled line
+          charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 40-85K visitors/month are realistic for a mid-sized website
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dimensions
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses go.Scatter with fill="tozeroy" and hovertemplate for interactivity,
+          but could leverage more Plotly features like annotations or range slider
+  verdict: APPROVED
diff --git a/plots/line-filled/metadata/plotnine.yaml b/plots/line-filled/metadata/plotnine.yaml
index 52fabaac1e..8ccbd52ebb 100644
--- a/plots/line-filled/metadata/plotnine.yaml
+++ b/plots/line-filled/metadata/plotnine.yaml
@@ -24,3 +24,177 @@ review:
     3...12) which is confusing for monthly data
   - Y-axis label could include units (e.g., Website Visitors (thousands) or use K
     formatting)
+  image_description: The plot displays a filled line chart showing monthly website
+    visitors over a 12-month period. The area beneath the line is filled with a muted
+    blue color (#306998) at approximately 40% transparency, creating a clear area
+    chart effect. A darker blue line of the same color traces the top edge of the
+    filled region. The y-axis labeled "Website Visitors" ranges from 0 to approximately
+    70,000, and the x-axis labeled "Month" shows values from about 1 to 12.5 (displayed
+    as decimal values like 2.5, 5.0, 7.5, etc.). The title "line-filled · plotnine
+    · pyplots.ai" is prominently displayed at the top. The data shows some variation
+    with peaks around months 4 and 11, and troughs around months 2 and 8. The background
+    is clean with a subtle grid, and the overall layout is well-balanced with the
+    plot filling the canvas appropriately.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          target resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line is clearly visible (size=2), fill alpha (0.4) provides good
+          contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no colorblind concerns
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units (could be "Website Visitors
+          (thousands)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle but x-axis shows decimal months (2.5, 5.0) instead
+          of integers which is slightly confusing
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled line/area chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (Month) and Y (Visitors) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (alpha=0.4), visible line on top, fill matches
+          line color, baseline at y=0
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-filled · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation over time with peaks and troughs, demonstrates the
+          filled area effect well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a neutral, realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values (50k-70k visitors) are plausible, but months shown as decimals
+          (2.5, 5.0) is odd for discrete monthly data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but uses verbose=False instead of standard approach
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with geom_area + geom_line layering,
+          theme_minimal, and element_text customization. Good use of grammar of graphics
+          but could leverage more plotnine-specific features.
+  verdict: APPROVED
diff --git a/plots/line-filled/metadata/pygal.yaml b/plots/line-filled/metadata/pygal.yaml
index 6b38196321..9dccddf897 100644
--- a/plots/line-filled/metadata/pygal.yaml
+++ b/plots/line-filled/metadata/pygal.yaml
@@ -24,3 +24,172 @@ review:
   - Dots are functional but could be slightly larger for better visibility at full
     resolution
   - Could leverage pygal interpolation feature for smoother curves
+  image_description: The plot displays a filled line chart showing monthly website
+    traffic over 12 months (January to December). The chart uses Python Blue (#306998)
+    for both the line and the semi-transparent fill area beneath it. The title "line-filled
+    · pygal · pyplots.ai" is centered at the top. The X-axis is labeled "Month" with
+    abbreviated month names (Jan through Dec), and the Y-axis is labeled "Page Views"
+    with values ranging from approximately 41,000 to 64,000. Small dots mark each
+    data point. The fill area effectively emphasizes the magnitude of values, showing
+    a seasonal pattern with a summer peak in July (~63,000) and a holiday spike in
+    December (~64,000). A legend reading "Website Traffic" appears in the top-left
+    corner. Horizontal grid lines are present and subtle.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable. Font sizes
+          are appropriate for the canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; month labels are well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and fill are clearly visible. Dots are small but adequate.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color is colorblind-safe.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; slight issue with legend placement in top-left.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels ("Month", "Page Views").
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend placement is awkward (top-left, outside plot area, small text
+          "=Website Traffic"). Grid is subtle but acceptable.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled line plot (area chart effect).
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time (months), Y is numeric (page views).
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Fill is semi-transparent, line visible on top, fill color matches
+          line.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range appropriately.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies the series.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows "{spec-id} · {library} · pyplots.ai" format.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows seasonal variation and trends well, but data is relatively
+          smooth without dramatic features.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a realistic scenario; seasonal patterns are plausible.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Page view values (~40k-65k) are realistic for a website.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's fill=True option and custom Style, but doesn't leverage
+          more advanced features like tooltips or interpolation.
+  verdict: APPROVED
diff --git a/plots/line-filled/metadata/seaborn.yaml b/plots/line-filled/metadata/seaborn.yaml
index 0271948b24..340ee5ac29 100644
--- a/plots/line-filled/metadata/seaborn.yaml
+++ b/plots/line-filled/metadata/seaborn.yaml
@@ -27,3 +27,171 @@ review:
   - Data could show more variety (e.g., a notable spike or dip for a special event)
     to demonstrate range of values better
   - Fill is done with matplotlib fill_between rather than a seaborn-native approach
+  image_description: The plot displays a filled line chart showing website traffic
+    over 60 days. The x-axis shows "Day" (0-60) and the y-axis shows "Website Visitors"
+    (0-8000+). A blue line (#306998) traces daily visitor counts with a semi-transparent
+    blue fill beneath it extending to the y=0 baseline. The data shows an upward trend
+    with weekly cyclical patterns (peaks roughly every 7 days). The title reads "line-filled
+    · seaborn · pyplots.ai". A legend in the upper left identifies the line as "Daily
+    Visitors". The grid is subtle with dashed lines at alpha 0.3.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is clearly visible, fill alpha 0.4 shows magnitude
+          well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but missing units (e.g., "Day" could be "Day (number)",
+          "Website Visitors" could include "(count)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid at alpha 0.3, legend well-placed in upper left
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct filled line/area chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=continuous time (days), Y=numeric value (visitors)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (alpha 0.4), visible line on top, fill color
+          matches line, baseline at y=0
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Daily Visitors"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows upward trend and weekly cycle, but could demonstrate more variation
+          in amplitude or include special events
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic is a neutral, realistic scenario with plausible patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 5000-8500 daily visitors is realistic for a mid-sized website
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib, numpy, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.lineplot which is seaborn's plotting function, but fill_between
+          comes from matplotlib. Could have explored seaborn's native area plot capabilities
+          more.
+  verdict: APPROVED
diff --git a/plots/line-interactive/metadata/altair.yaml b/plots/line-interactive/metadata/altair.yaml
index ab737b74d4..f200ed9eb9 100644
--- a/plots/line-interactive/metadata/altair.yaml
+++ b/plots/line-interactive/metadata/altair.yaml
@@ -27,3 +27,174 @@ review:
   - No reset zoom button to return to full view
   - Could add .interactive() to main chart for built-in zoom/pan capabilities
   - Overview panel Y-axis label is slightly cramped/overlapping
+  image_description: 'The plot displays a dual-panel line chart visualization. The
+    main (top) panel shows daily temperature readings throughout 2024 with a blue
+    line (#306998) and point markers. The data exhibits a clear seasonal pattern:
+    temperatures start cold (around -8°C to 6°C in January), rise through spring,
+    peak in summer (reaching ~36°C in July), and decline through autumn/winter. The
+    bottom panel serves as a range selector, showing the same data in condensed form
+    with the instruction "Drag to select range." Both panels have "Date" on the x-axis
+    and "Temperature (°C)" on the y-axis. The title correctly follows the format:
+    "Daily Temperature 2024 · line-interactive · altair · pyplots.ai". Grid lines
+    are subtle, and all text is clearly readable.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is ~28pt, axis labels ~18-22pt, tick labels clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and points visible, though point markers could be slightly more
+          prominent at hover
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions but overview panel is relatively small
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" with units, "Date" is appropriate'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend present (not required for
+          single series)
+    spec_compliance:
+      score: 22
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct interactive line chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X, Temperature on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has hover tooltips, range selection via brush. Missing explicit zoom/pan
+          via mouse wheel and reset button (spec mentions these specifically)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis padded appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend (single series, acceptable but loses points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Daily Temperature 2024 · line-interactive · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full year with seasonal variation, daily fluctuations, realistic
+          noise
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings - neutral, scientific, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range (-8°C to ~36°C) realistic for temperate Northern
+          Hemisphere city
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of selection_interval for brush selection, vconcat for linked
+          charts, tooltip encoding. However, could use .interactive() for additional
+          zoom/pan, which is an Altair signature feature
+  verdict: APPROVED
diff --git a/plots/line-interactive/metadata/bokeh.yaml b/plots/line-interactive/metadata/bokeh.yaml
index 146f42db4b..036a9dca0a 100644
--- a/plots/line-interactive/metadata/bokeh.yaml
+++ b/plots/line-interactive/metadata/bokeh.yaml
@@ -27,3 +27,174 @@ review:
     in requirements
   - Marker size could be reduced to 6-8 with alpha 0.6 for 365 data points to reduce
     visual clutter
+  image_description: The plot displays an interactive line chart showing daily temperature
+    readings over a full year (January 2024 to January 2025). The line is rendered
+    in Python Blue (#306998) with small circular markers at each data point that have
+    a yellow border. The chart shows a clear seasonal sinusoidal pattern with temperatures
+    ranging from approximately -5°C in winter to ~35°C in summer. The title "line-interactive
+    · bokeh · pyplots.ai" appears in blue at the top left. The y-axis is labeled "Temperature
+    (°C)" and the x-axis is labeled "Date". A legend box labeled "Daily Temperature"
+    is positioned in the upper left. The background is light gray (#FAFAFA) with subtle
+    dashed grid lines. The bokeh toolbar with zoom/pan controls is visible on the
+    right edge.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, date labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Line width of 4 and marker size of 10 are appropriate for 365 points,
+          though markers could be slightly smaller with lower alpha for this density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue) with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha 0.2, but legend position overlaps the highest
+          data points in January 2024
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct interactive line chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X, Temperature on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has hover tooltips, zoom (wheel + box), pan, reset. Missing: range
+          selector/slider for quick navigation mentioned in spec'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 365 days visible, full temperature range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Daily Temperature" accurately describes the data'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "line-interactive · bokeh · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full seasonal variation, noise, both extremes
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Daily temperature is a plausible scenario, though base temp of 12°C
+          suggests a specific climate zone
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range -5°C to 35°C is realistic for temperate climates
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource, HoverTool with custom tooltips, interactive
+          tools (WheelZoom, BoxZoom, Pan, Reset). Could leverage RangeSlider widget
+          for enhanced navigation per spec.
+  verdict: APPROVED
diff --git a/plots/line-interactive/metadata/highcharts.yaml b/plots/line-interactive/metadata/highcharts.yaml
index 190dbb5e4b..eaaf85211d 100644
--- a/plots/line-interactive/metadata/highcharts.yaml
+++ b/plots/line-interactive/metadata/highcharts.yaml
@@ -24,3 +24,181 @@ review:
     for time series
   - Legend marker at bottom is quite small
   - Uses raw dict configuration instead of highcharts-core Python wrapper classes
+  image_description: The plot displays an interactive line chart showing server CPU
+    usage over 7 days of hourly data. The title "line-interactive · highcharts · pyplots.ai"
+    is prominently displayed at the top in bold black text, with a subtitle explaining
+    the zoom functionality. The main chart area shows a blue (#306998) line tracing
+    CPU usage percentage (Y-axis, 0-106%) against date/time (X-axis, Jan 01-08). The
+    line exhibits clear daily cyclical patterns with peaks around 55-65% and troughs
+    around 10-30%, along with visible spikes (anomalies) reaching up to ~73%. The
+    Y-axis has percentage labels, and the X-axis shows daily date labels. A legend
+    "CPU Usage" appears at the bottom center. The overall layout has a clean white
+    background with subtle gray grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title, axis labels, and tick marks are clearly
+          visible. Font sizes are appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Date labels are spaced well.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width is appropriate (5px), markers appear on hover. The data
+          pattern is clearly visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe blue (#306998) on white background with good
+          contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart fills the canvas well with balanced margins. Spacing is properly
+          configured.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"CPU Usage (%)" with units on Y-axis, "Date and Time" on X-axis.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), but legend is placed at bottom which
+          is acceptable. However, the legend marker is very small and could be more
+          visible.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct interactive line chart type.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Datetime on X-axis, numeric CPU % on Y-axis correctly mapped.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Hover tooltips (crosshair + tooltip configured), zoom (zoomType:
+          "x"), and the subtitle instructs on zoom usage.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-105%, X-axis shows full 7-day range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"CPU Usage" legend label is accurate.'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "line-interactive · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows daily cycles, upward trend, noise, and spike anomalies. Could
+          show more dramatic variation or multiple series.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server CPU usage is a realistic, neutral IT monitoring scenario.
+          Values are plausible.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: CPU usage 5-100% is realistic; 168 hourly points for 7 days is sensible.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → config → HTML → screenshot.
+          No functions/classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (json, tempfile, time, urllib, Path, numpy,
+          pandas, selenium).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses manual dict config instead of highcharts-core Python library.
+          While functional, the library docs recommend the Python wrapper.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts's native zoom, crosshair, datetime formatting, and
+          interactive HTML export. However, does not use range selector or navigator
+          which are Highcharts's premium interactive features for time series.
+  verdict: APPROVED
diff --git a/plots/line-interactive/metadata/letsplot.yaml b/plots/line-interactive/metadata/letsplot.yaml
index 4c71b184d5..686fe4033d 100644
--- a/plots/line-interactive/metadata/letsplot.yaml
+++ b/plots/line-interactive/metadata/letsplot.yaml
@@ -26,3 +26,177 @@ review:
     directory as plot.png
   - Points (size=2) are slightly small for 350 data points - could be size=3 for better
     visibility in static PNG
+  image_description: The plot displays an interactive time series line chart showing
+    server response times over approximately 15 days (June 1-16, 2024). A blue line
+    (#306998) connects 350 data points, with yellow/gold circular markers (#FFD43B)
+    at each observation. The Y-axis is labeled "Response Time (ms)" ranging from approximately
+    55-155ms. The X-axis shows "Date/Time" with daily tick marks. The title correctly
+    shows "line-interactive · lets-plot · pyplots.ai". The visualization clearly shows
+    daily cyclical patterns (peaks during business hours, troughs at night) and several
+    anomaly spikes around June 3, June 9-10, and June 13 that reach 125-155ms. The
+    plot uses a clean minimal theme with a subtle grid, good canvas utilization, and
+    balanced margins.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, date labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and points visible, though points could be slightly larger for
+          350 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line and yellow points provide excellent contrast, colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight extra space at top margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Response Time (ms)" and "Date/Time"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed (single series), but grid could be more subtle (appears
+          at default alpha)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct interactive line chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=datetime, Y=response_time correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has tooltips on hover, HTML export enables zoom/pan. Missing explicit
+          range selector/slider mentioned in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend present (acceptable for single series, but tooltip format
+          could include series name)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "line-interactive · lets-plot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows daily cycles, weekly patterns, upward trend, and anomaly spikes
+          - excellent variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response time metrics are a perfect, neutral real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times 60-155ms are realistic for server metrics
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to script_dir instead of current working directory (should
+          be `plot.png` not `os.path.join(script_dir, "plot.png")`)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses layer_tooltips for hover, ggplot grammar, ggsave with scale
+          parameter. Could leverage more lets-plot interactive features like coord_cartesian
+          for zoom limits.
+  verdict: APPROVED
diff --git a/plots/line-interactive/metadata/plotly.yaml b/plots/line-interactive/metadata/plotly.yaml
index f41fa62597..cafcd14ee0 100644
--- a/plots/line-interactive/metadata/plotly.yaml
+++ b/plots/line-interactive/metadata/plotly.yaml
@@ -22,3 +22,175 @@ review:
   weaknesses:
   - Missing visible legend in the plot (trace has name but legend not shown)
   - Pandas import is unnecessary - could use numpy for date generation
+  image_description: 'The plot displays a line chart showing "Server Metrics · line-interactive
+    · plotly · pyplots.ai" as the title. The chart uses a blue line (#306998) to show
+    CPU Usage (%) over 7 days (Jan 1-7, 2024). The Y-axis ranges from 0 to 100% with
+    clear "CPU Usage (%)" label. The X-axis shows "Date & Time" with daily tick marks.
+    The data exhibits daily cyclical patterns with peaks reaching ~65-75% and troughs
+    around 10-30%. Notable interactive features visible: range selector buttons (1d,
+    3d, All) in top-left with "All" highlighted in yellow, and a range slider at the
+    bottom showing a miniature version of the full dataset. The layout is clean with
+    a white background, subtle grid lines, and well-proportioned margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 2.5 perfectly suited for 168 data points, shows patterns
+          clearly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue line, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills ~60% of canvas with range slider below
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"CPU Usage (%)" with units, "Date & Time" descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.1), but no legend shown for the "CPU Usage"
+          trace
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct interactive line chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=datetime, Y=numeric correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Hover tooltips, zoom, pan, range selector, range slider all implemented
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-105 shows all data including spikes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: No visible legend in the chart (trace has name="CPU Usage" but legend
+          not displayed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format with spec-id · library · pyplots.ai (includes
+          contextual "Server Metrics")
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows daily cycles, upward trend, noise, AND anomaly spikes - excellent
+          coverage of time series patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server CPU monitoring is a real, neutral, commonly understood scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: CPU usage 5-100% with realistic base ~35%, daily variations of ±20%,
+          perfect
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: pandas imported but only used for date_range; numpy could do this
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Plotly API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Plotly''s interactive features: rangeslider, rangeselector
+          buttons, hovertemplate with custom formatting, modebar customization, drag
+          modes'
+  verdict: APPROVED
diff --git a/plots/line-interactive/metadata/pygal.yaml b/plots/line-interactive/metadata/pygal.yaml
index cc27002912..7c21015b9f 100644
--- a/plots/line-interactive/metadata/pygal.yaml
+++ b/plots/line-interactive/metadata/pygal.yaml
@@ -27,3 +27,179 @@ review:
     is a limitation of the PNG format, not a code issue
   - Single data series limits feature demonstration; could show multiple locations
     or metrics for comparison
+  image_description: The plot displays a line chart showing daily temperature data
+    for 2024. The chart uses a blue color (#306998 - Python Blue) for the line with
+    small dots marking each data point. The y-axis shows "Temperature (°C)" ranging
+    from approximately -2 to 36 degrees. The x-axis shows "Date" with major labels
+    approximately every 30 days (Jan 01, Jan 31, Mar 01, Mar 31, etc.) rotated at
+    45 degrees. The title reads "Daily Temperature 2024 · line-interactive · pygal
+    · pyplots.ai" at the top. A legend labeled "Temperature" appears at the bottom.
+    The data shows a clear seasonal sinusoidal pattern with winter lows around 0-8°C
+    and summer highs around 25-36°C, with daily noise variation creating the jagged
+    appearance. The background is white with subtle horizontal grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clear and readable; tick labels adequately
+          sized for 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; x-axis labels well-spaced with major labels
+          every 30 days
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line clearly visible; dots appropriately sized for 365 data points;
+          line could be slightly thicker for better visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single series in blue is colorblind-safe; good contrast against white
+          background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions; plot fills reasonable canvas area; slight margin
+          imbalance at bottom with legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Temperature (°C)" and "Date"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle horizontal grid lines; legend well-positioned at bottom
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct interactive line chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, temperature on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Hover tooltips present in HTML output; however PNG is static - zoom/pan/range
+          selection only work in HTML version
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year of data shown with appropriate axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels the temperature series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Daily Temperature 2024 · line-interactive
+          · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation with daily noise; demonstrates large time
+          series handling; could show multiple series for richer comparison
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Weather station temperature data is a neutral, real-world scenario
+          perfect for time series
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (-2°C to 36°C annual range is plausible
+          for temperate climate)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure; no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for consistent output
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, pygal, Style)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png AND plot.html (both are acceptable for pygal)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of pygal's Style customization, custom tooltips with formatted
+          labels, and dual SVG/PNG/HTML output; could leverage more pygal-specific
+          features like built-in animations or explicit interactivity configurations
+  verdict: APPROVED
diff --git a/plots/line-loss-training/metadata/altair.yaml b/plots/line-loss-training/metadata/altair.yaml
index e5e04d1b7e..b1c7a9165e 100644
--- a/plots/line-loss-training/metadata/altair.yaml
+++ b/plots/line-loss-training/metadata/altair.yaml
@@ -26,3 +26,177 @@ review:
   - Legend title "Curve Type" is generic; could be more descriptive
   - Points on every epoch (50 points per curve) add visual noise; could use opacity
     or fewer markers
+  image_description: 'The plot displays two line curves on a white background with
+    a subtle grid. The title "line-loss-training · altair · pyplots.ai" is centered
+    at the top in large black text. The x-axis is labeled "Epoch" (ranging from 0
+    to 50) and the y-axis is labeled "Cross-Entropy Loss" (ranging from 0.0 to 2.8).
+    Two curves are shown: a blue line (Training Loss) starting at ~2.5 and exponentially
+    decaying to ~0.15, and a yellow/gold line (Validation Loss) starting at ~2.7,
+    decaying then showing overfitting behavior by rising slightly after epoch 25.
+    A red diamond marker at approximately epoch 30 indicates "Min Val Loss (Epoch
+    30)" with a red text annotation. The legend in the top-right corner shows "Curve
+    Type" with Training Loss (blue) and Validation Loss (yellow). Points are marked
+    along both lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (28pt), axis labels are 22pt, tick labels 18pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are thick (strokeWidth=3), points are visible, but points at
+          50 epochs create slight visual density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and have
+          excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, legend is appropriately positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis specifies "Cross-Entropy Loss" as required by spec, X-axis
+          is "Epoch"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid opacity is subtle (0.3), legend is well-placed but labeled "Curve
+          Type" instead of something more specific
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart showing training/validation loss curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Epoch, Y=Loss values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has distinct colors, legend, both curves, and optional minimum validation
+          loss marker
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels Training Loss and Validation Loss
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows required format "line-loss-training · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows exponential decay, overfitting behavior (val loss rising after
+          epoch 25), optimal stopping point
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Neural network training scenario is plausible, but 50 epochs is on
+          the shorter end for typical training
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Loss values (0.15 to 2.7) are realistic for cross-entropy loss
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" which is correct, but also saves HTML
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses layered grammar (lines + points + marker + text), proper encoding
+          types, but doesn't use interactivity in a meaningful way for the saved output
+  verdict: APPROVED
diff --git a/plots/line-loss-training/metadata/highcharts.yaml b/plots/line-loss-training/metadata/highcharts.yaml
index ad9c474a9b..1e1774c1d0 100644
--- a/plots/line-loss-training/metadata/highcharts.yaml
+++ b/plots/line-loss-training/metadata/highcharts.yaml
@@ -26,3 +26,176 @@ review:
     begin; positioning at right-middle or bottom would be cleaner
   - 'Grid lines could be more subtle (current gridLineColor #e0e0e0 is visible but
     acceptable)'
+  image_description: 'The plot displays two line curves on a white background showing
+    training loss (blue with circle markers) and validation loss (yellow/gold with
+    square markers) over 50 epochs. The title "line-loss-training · highcharts · pyplots.ai"
+    appears at the top in bold, with a subtitle indicating "Optimal stopping: Epoch
+    29 (Val Loss: 0.694)". The Y-axis is labeled "Cross-Entropy Loss" ranging from
+    0 to 2.8, and the X-axis is labeled "Epoch" ranging from 0 to 50. A legend in
+    the upper right corner clearly identifies both series. Both curves start high
+    (~2.5-2.65) and decay exponentially, with the validation loss showing clear overfitting
+    behavior after approximately epoch 28 where it begins to increase while training
+    loss continues to decrease.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend all clearly readable at
+          high resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths and marker sizes are well-adapted for 50 data points,
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow/gold (#FFD43B) are colorblind-safe, excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though slight extra whitespace at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Cross-Entropy Loss" specifies the loss function, "Epoch" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid lines are subtle, but legend is placed in upper right which
+          slightly overlaps the visual flow of where data begins
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot showing training curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Epochs on X-axis, loss values on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has both training and validation curves, legend, distinct colors,
+          and optimal stopping annotation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 50 epochs visible, Y-axis shows full loss range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Training Loss" and "Validation Loss"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format `line-loss-training · highcharts · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows classic training dynamics: initial high loss, exponential
+          decay, and overfitting divergence'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Neural network training is a plausible scenario; loss values and
+          decay pattern are realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Loss values 0-2.8 and 50 epochs are typical for deep learning training
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → series → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts subtitle for annotation, proper series configuration,
+          and interactive HTML export; could leverage more advanced features like
+          plotBands or annotations API
+  verdict: APPROVED
diff --git a/plots/line-loss-training/metadata/letsplot.yaml b/plots/line-loss-training/metadata/letsplot.yaml
index 16dd9e86fd..f65d454e53 100644
--- a/plots/line-loss-training/metadata/letsplot.yaml
+++ b/plots/line-loss-training/metadata/letsplot.yaml
@@ -23,3 +23,171 @@ review:
   - Grid lines are not visible despite using theme_minimal (could add explicit grid
     with alpha for subtle visibility)
   - The legend shows empty string for color label which works but is not ideal practice
+  image_description: 'The plot displays a training loss curve with two lines on a
+    light gray gridded background. The blue line represents "Training Loss" starting
+    at ~2.5 and decreasing smoothly to ~0.1 by epoch 100. The yellow/gold line represents
+    "Validation Loss" starting at ~2.6, decreasing initially but then showing overfitting
+    behavior after ~epoch 50, rising to ~0.6 by epoch 100. A vertical red dashed line
+    marks the optimal stopping point at approximately epoch 60, with a red diamond
+    marker at the minimum validation loss point (~0.4). The title "line-loss-training
+    · letsplot · pyplots.ai" appears in bold at the top. The legend is positioned
+    at the top, showing "Training Loss" (blue) and "Validation Loss" (yellow). Axes
+    are clearly labeled: "Epoch" (x-axis) and "Cross-Entropy Loss" (y-axis).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large, axis labels and tick marks are clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are well-sized (1.5), optimal point marker visible but could
+          be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are distinguishable, colorblind-safe combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Cross-Entropy Loss" (descriptive), X-axis has "Epoch"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is minimal theme (no visible grid lines), legend well placed
+          at top
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot for training loss curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Epoch on X, Loss on Y correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Dual curves, distinct colors, legend, optimal stopping point marked
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, epochs 1-100
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels Training Loss and Validation Loss
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "line-loss-training · letsplot · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows key features: initial high loss, training convergence, overfitting
+          divergence after epoch 50'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Neural network training is realistic, loss values plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Loss values 0-2.6 appropriate for cross-entropy, 100 epochs typical
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png but also saves plot.html (minor, acceptable)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, theme_minimal, scale_color_manual, geom_vline;
+          good but not exceptional library usage
+  verdict: APPROVED
diff --git a/plots/line-loss-training/metadata/matplotlib.yaml b/plots/line-loss-training/metadata/matplotlib.yaml
index a1e65229ff..a8cef1e01c 100644
--- a/plots/line-loss-training/metadata/matplotlib.yaml
+++ b/plots/line-loss-training/metadata/matplotlib.yaml
@@ -26,3 +26,176 @@ review:
     consider upper center or outside plot
   - Could use matplotlib-specific features like fill_between for confidence intervals
     or secondary annotations
+  image_description: |-
+    The plot displays two line curves on a 16:9 canvas with a white background. The title "line-loss-training · matplotlib · pyplots.ai" appears at the top in italicized black text. The X-axis is labeled "Epoch" (ranging from 0 to 50), and the Y-axis is labeled "Cross-Entropy Loss" (ranging from 0.0 to 2.5+). Two distinct lines with markers are shown:
+    - **Training Loss** (blue line with circular markers): Starts at ~2.5 and decays exponentially to ~0.2 by epoch 50
+    - **Validation Loss** (yellow/golden line with square markers): Starts similarly high but shows overfitting behavior after epoch ~28, where it begins increasing while training loss continues to decrease
+    A large red circular marker with black border highlights "Best: Epoch 29" - the optimal early stopping point - with an arrow annotation pointing to it. The legend is positioned in the upper right corner. The grid is subtle with dashed gray lines at alpha 0.3.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines at linewidth=3 and markers at size 6 are well-sized for 50
+          data points; distinct and clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Epoch" and "Cross-Entropy Loss" are descriptive with loss function
+          specified'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle at alpha=0.3 which is good, but legend could be better
+          positioned (upper right overlaps slightly with high loss values in early
+          epochs)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot showing dual curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Epochs on X-axis, loss values on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Training and validation curves present, distinct colors (blue/orange
+          as spec suggested), legend present, overfitting pattern shown, optimal stopping
+          point marked
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 50 epochs visible, Y-axis shows full range from 0 to ~2.8
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Training Loss" and "Validation Loss"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-loss-training · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Demonstrates exponential decay in both curves initially, shows clear
+          overfitting divergence after epoch 28, marks optimal early stopping point
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic neural network training scenario with cross-entropy loss,
+          plausible loss magnitudes
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Loss values are reasonable, but initial loss of ~2.5 is slightly
+          high for typical cross-entropy (usually starts around 1-2 for multi-class
+          classification); minor deduction
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at the beginning
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API (ax.* methods)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png" with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of annotation with arrowprops and bbox styling, scatter
+          for emphasis point, but no advanced matplotlib-specific features like twin
+          axes, secondary y-axis, or fill_between for confidence intervals
+  verdict: APPROVED
diff --git a/plots/line-loss-training/metadata/plotly.yaml b/plots/line-loss-training/metadata/plotly.yaml
index 3ebbb471af..1936aadd65 100644
--- a/plots/line-loss-training/metadata/plotly.yaml
+++ b/plots/line-loss-training/metadata/plotly.yaml
@@ -26,3 +26,177 @@ review:
   - Star marker for best epoch could be larger (size 16 is adequate but not prominent)
   - Could utilize Plotly-specific features like vertical line annotation at best epoch
     or shaded overfitting region
+  image_description: 'The plot displays two smooth line curves over a white background
+    with subtle gray gridlines. The title "line-loss-training · plotly · pyplots.ai"
+    is centered at the top. The blue line (Training Loss) and yellow/gold line (Validation
+    Loss) both start around 2.5 and decrease exponentially. The training loss continues
+    decreasing to approximately 0.2, while the validation loss reaches a minimum around
+    epoch 60 (marked with a red star labeled "Best: Epoch 61") and then diverges upward,
+    demonstrating classic overfitting behavior. The x-axis shows "Epoch" (0-100),
+    and the y-axis shows "Cross-Entropy Loss" (0.5-2.5+). A legend in the upper right
+    corner identifies the three elements: Training Loss, Validation Loss, and Best
+    Epoch.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, ticks at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width of 3 is appropriate; star marker is visible but slightly
+          small for emphasis
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; minor issue with large right margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Epoch" and "Cross-Entropy Loss" are descriptive (loss function
+          specified as per spec)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha 0.3, but legend placement partially overlaps
+          with gridlines
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot for training curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Epoch on X-axis, loss values on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both training and validation curves present, distinct colors (blue/yellow),
+          legend included, minimum validation loss marked with star annotation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis covers epochs 1-100, Y-axis shows full loss range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels Training Loss, Validation Loss, and Best
+          Epoch
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: "line-loss-training · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows exponential decay, overfitting divergence, and optimal stopping
+          point; slight noise added for realism
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Neural network training is a perfect, neutral real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Loss values 0.1-2.5 are realistic for cross-entropy; 100 epochs is
+          typical
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of hover templates for interactivity, but could leverage
+          more Plotly features like annotations, shapes for regions, or rangeslider
+  verdict: APPROVED
diff --git a/plots/line-loss-training/metadata/plotnine.yaml b/plots/line-loss-training/metadata/plotnine.yaml
index 0e03427491..33709e7554 100644
--- a/plots/line-loss-training/metadata/plotnine.yaml
+++ b/plots/line-loss-training/metadata/plotnine.yaml
@@ -26,3 +26,181 @@ review:
     y-position that may not scale well with different data'
   - Yellow color for validation loss could benefit from slightly darker shade for
     better contrast against white background
+  image_description: 'The plot displays two training curves on a clean white background
+    with a minimal theme. The title "line-loss-training · plotnine · pyplots.ai" is
+    prominently displayed at the top in bold black text. Below the title is a horizontal
+    legend showing "Training Loss" (blue/navy, #306998) and "Validation Loss" (yellow/gold,
+    #FFD43B) with circular markers. The X-axis is labeled "Epoch" (0-50), and the
+    Y-axis is labeled "Cross-Entropy Loss" (0-2.5). Both curves show exponential decay
+    from high initial values (~2.5) that level off. The training loss (blue) continues
+    decreasing smoothly to ~0.15, while the validation loss (yellow) plateaus around
+    epoch 30 and shows slight uptick afterward, demonstrating overfitting behavior.
+    A vertical dashed gray line at epoch 46 marks "Best: 46" indicating the optimal
+    stopping point. Both lines have circular point markers. The grid is subtle gray
+    with good transparency.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis titles 20pt, tick labels 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines and points well-sized (size=1.5 lines, size=3 points), though
+          points slightly small for this scale
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are distinct and colorblind-friendly,
+          though yellow could have slightly more contrast against white
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 16:9 canvas, plot fills space well with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Y-axis includes unit: "Cross-Entropy Loss", X-axis descriptive:
+          "Epoch"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend well-placed at top, but grid alpha inconsistent (some grid
+          lines barely visible while others more prominent)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with dual curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Epoch, Y=Loss, Color=Type correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Training/validation curves, legend, distinct colors (blue/orange
+          spec suggests met with blue/yellow), optimal stopping point marked
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full epoch range 1-50 shown, y-axis auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Training Loss" and "Validation Loss"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "line-loss-training · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: initial high loss, exponential decay, overfitting
+          divergence after epoch 30, optimal stopping point annotation'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Neural network training scenario with realistic cross-entropy loss
+          values and typical training dynamics
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Loss values realistic (2.5→0.15), 50 epochs reasonable, though optimal
+          stopping at epoch 46 seems late given visible overfitting starts ~30
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used, well-organized from plotnine
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar (aes, geom_line, geom_point, annotate,
+          scale_color_manual, theme_minimal), but doesn't leverage more advanced features
+          like faceting or statistical transformations
+  verdict: APPROVED
diff --git a/plots/line-loss-training/metadata/pygal.yaml b/plots/line-loss-training/metadata/pygal.yaml
index 0934bab699..b747afa4cb 100644
--- a/plots/line-loss-training/metadata/pygal.yaml
+++ b/plots/line-loss-training/metadata/pygal.yaml
@@ -27,3 +27,182 @@ review:
     loss (optimal stopping point)'
   - Grid only shows horizontal lines; vertical guides disabled which reduces readability
     for epoch identification
+  image_description: 'The plot displays two line curves on a white background showing
+    training loss (blue/Python blue #306998) and validation loss (yellow/Python yellow
+    #FFD43B) over 50 epochs. The title "line-loss-training · pygal · pyplots.ai" appears
+    at the top center. The x-axis is labeled "Epoch" with tick marks at intervals
+    of 5 (5, 10, 15... 50). The y-axis is labeled "Cross-Entropy Loss" ranging from
+    approximately 0.2 to 2.4. Both curves start high (~2.3-2.4) and decrease over
+    epochs. The training loss (blue) continues decreasing steadily to ~0.1, while
+    the validation loss (yellow) begins increasing after around epoch 25, demonstrating
+    classic overfitting behavior. A legend appears in the top-left corner outside
+    the plot area with colored squares for "Training Loss" and "Validation Loss".
+    Data points are marked with small dots along each line. Horizontal grid lines
+    are visible at subtle opacity.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable. Font sizes
+          are appropriate for 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. X-axis labels spaced at every 5 epochs
+          prevent crowding.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are clearly visible with good stroke width. Dots are appropriately
+          sized for data density (50 points).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe and provide excellent contrast
+          against white background.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization. Plot fills majority of space. Legend position
+          in top-left is acceptable but slightly isolated.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Epoch" and "Cross-Entropy Loss" (specifies
+          loss function type as required by spec).'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Horizontal grid lines are subtle and helpful. Legend is functional
+          but positioned outside plot area which is slightly unusual.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart type for training loss curves.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis = epochs (1-50), Y-axis = loss values. Correctly mapped.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Two curves (training/validation), distinct colors (blue/yellow),
+          clear legend, demonstrates overfitting behavior.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes appropriately scaled.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Training Loss" and "Validation Loss".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-loss-training · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows overfitting pattern well (validation loss diverges from training
+          loss after ~epoch 25). Could optionally mark minimum validation loss point.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Neural network training is a realistic, neutral scenario. Loss curve
+          shapes are authentic.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Loss values (0.1 to 2.5) and 50 epochs are realistic for typical
+          training scenarios.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save. No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic output.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and pygal imports, all used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png AND plot.html (correct for pygal).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's custom Style class, Line chart with dots, x_labels,
+          legend configuration. Could leverage more pygal-specific features like tooltips
+          or annotations.
+  verdict: APPROVED
diff --git a/plots/line-loss-training/metadata/seaborn.yaml b/plots/line-loss-training/metadata/seaborn.yaml
index 3f65c75bba..a4acf23da2 100644
--- a/plots/line-loss-training/metadata/seaborn.yaml
+++ b/plots/line-loss-training/metadata/seaborn.yaml
@@ -22,3 +22,173 @@ review:
   weaknesses:
   - Legend could be positioned better to avoid visual competition with the annotation
     area
+  image_description: 'The plot displays two line curves over 100 epochs showing neural
+    network training loss. A blue line (#306998) represents "Training Loss" and a
+    yellow/gold line (#FFD43B) represents "Validation Loss". Both curves start around
+    2.5-2.6 and decay exponentially. The training loss (blue) decreases smoothly to
+    around 0.2, while validation loss (yellow) plateaus higher around 0.35-0.4 and
+    shows slight upward trend after epoch 70 (overfitting). A red dashed vertical
+    line marks the optimal stopping point at epoch 90, with a red dot and arrow annotation
+    pointing to "Optimal: Epoch 90". The title follows the format "line-loss-training
+    · seaborn · pyplots.ai". X-axis labeled "Epoch", Y-axis labeled "Cross-Entropy
+    Loss". Legend in upper right corner. Subtle grid with alpha transparency.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines with linewidth=3 and distinct colors are perfectly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight gap at right edge with annotation
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Y-axis includes units: "Cross-Entropy Loss", X-axis "Epoch" is contextually
+          clear'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha 0.3 is good but legend shows "Training Loss" and "Validation
+          Loss" instead of matching the code's "Type" column values
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot showing training curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Epoch on X, Loss on Y, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both curves, legend, distinct colors (blue/orange-ish), optimal stopping
+          point marked
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 0-105, Y-axis 0-2.8 shows all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels clear and accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "line-loss-training · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows decay, noise, overfitting after epoch 70, optimal point; could
+          show more dramatic overfitting
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Neural network training with cross-entropy loss is a realistic ML
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Loss values 0-2.8 are realistic for cross-entropy, 100 epochs typical
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern seaborn API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn lineplot with hue parameter
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of seaborn's data-driven approach with DataFrame and hue,
+          but could leverage more seaborn styling features
+  verdict: APPROVED
diff --git a/plots/line-markers/metadata/bokeh.yaml b/plots/line-markers/metadata/bokeh.yaml
index 478e6f6110..b2ac015c5b 100644
--- a/plots/line-markers/metadata/bokeh.yaml
+++ b/plots/line-markers/metadata/bokeh.yaml
@@ -23,3 +23,171 @@ review:
   weaknesses:
   - Legend text and glyph sizes could be slightly larger for better visibility at
     full canvas size
+  image_description: The plot displays a line chart with markers showing monthly temperature
+    readings for three weather stations over 12 months (January to December). Station
+    A is represented in blue with circular markers, Station B in yellow with square
+    markers, and Station C in red/coral with triangle markers. All three lines follow
+    a seasonal temperature pattern peaking in July-August. The title reads "line-markers
+    · bokeh · pyplots.ai" at the top left. The legend is positioned in the upper left
+    corner showing all three stations. X-axis shows month abbreviations (Jan-Dec),
+    Y-axis shows "Temperature (°C)" ranging from 0 to ~25. A subtle dashed grid is
+    visible in the background.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (size=20) are clearly visible and lines (width=4) are well-proportioned
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red colors are colorblind-distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though plot could use slightly more of the canvas
+          width
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Temperature (°C)", X-axis labeled "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but legend is somewhat small relative
+          to canvas size
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with markers chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows months, Y shows temperature values correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows multiple series with different marker shapes as spec requires
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three stations
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "line-markers · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple series, different marker shapes, seasonal variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature readings for weather stations is a neutral, realistic
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (0-26°C) are realistic for seasonal patterns
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple flat structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, interactive HTML output, custom tick label
+          overrides, Legend model configuration
+  verdict: APPROVED
diff --git a/plots/line-markers/metadata/highcharts.yaml b/plots/line-markers/metadata/highcharts.yaml
index 18793d7471..025f65bffb 100644
--- a/plots/line-markers/metadata/highcharts.yaml
+++ b/plots/line-markers/metadata/highcharts.yaml
@@ -26,3 +26,180 @@ review:
     alpha lowered)'
   - The chart height in the rendered image (2561px) does not match the specified 2700px,
     suggesting potential cropping or rendering issue
+  image_description: 'The plot displays a line chart with markers showing monthly
+    temperature readings at two weather stations over 12 months (Jan-Dec). There are
+    two series: "Station A (Coastal)" in dark blue (#306998) with circular markers,
+    and "Station B (Inland)" in golden yellow (#FFD43B) with diamond markers. Both
+    lines show clear seasonal temperature patterns - rising from winter lows (~2-6°C
+    in January) to summer peaks (~24-27°C in July/August), then declining. The title
+    "line-markers · highcharts · pyplots.ai" appears at the top with a subtitle "Monthly
+    Temperature Readings at Two Weather Stations". The Y-axis shows "Temperature (°C)"
+    ranging from 0-29, and the X-axis shows "Month" with all 12 month abbreviations.
+    The legend is positioned in the top-right corner. Grid lines are subtle gray.
+    The markers are clearly visible at each data point on both lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are large (radius 14) and clearly visible; lines have good
+          thickness (5px)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe; excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; legend positioned appropriately though
+          slightly far from the data
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Temperature (°C)" with units; X-axis has "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-placed but the grid could be more subtle (currently
+          slightly prominent)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (categorical), Y=temperature values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Lines with visible markers, multiple series with different marker
+          shapes (circle vs diamond)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis range (0-29°C covers all values)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match series names accurately
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-markers · highcharts · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows two series with different marker shapes, seasonal variation,
+          and clear distinction between coastal/inland patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature readings at weather stations is a relatable,
+          neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic for temperate climate (2-27°C annual
+          range)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Code is mostly linear but could be more streamlined (many configuration
+          blocks)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts_core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png (correct) but aspect ratio is 4800x2700 in code
+          while actual image appears to be 4800x2561
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts series configuration, marker customization, and interactive
+          HTML export; could leverage more advanced Highcharts features like tooltips
+          or hover states
+  verdict: APPROVED
diff --git a/plots/line-markers/metadata/letsplot.yaml b/plots/line-markers/metadata/letsplot.yaml
index 7d11271d0a..4fb5dd09f9 100644
--- a/plots/line-markers/metadata/letsplot.yaml
+++ b/plots/line-markers/metadata/letsplot.yaml
@@ -24,3 +24,163 @@ review:
   - Grid alpha 0.5 is too prominent - should be 0.2-0.4 for subtlety
   - Could use different marker shapes for multiple series as suggested in the spec
     notes
+  image_description: The plot displays three product lines (Product A, B, C) tracked
+    over 12 quarters showing revenue in Million USD. Product A (blue) shows strong
+    upward growth from ~47 to ~78 million. Product B (yellow/gold) starts highest
+    at ~61 million but trends downward to ~48 million. Product C (red) starts lowest
+    at ~34 million and grows steadily to ~57 million. Each data point has a circular
+    marker. The title follows the correct format "line-markers · lets-plot · pyplots.ai".
+    Legend is positioned on the right. Grid lines are subtle gray. The layout is clean
+    with good use of space.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis titles at 22pt, tick text at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (size=6) and lines (size=2.5) are well-sized for 36 data
+          points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and red are distinguishable; yellow/gold may be slightly less
+          visible for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Revenue (Million USD)", X-axis "Quarter" is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha at 0.5 is slightly too prominent (should be 0.2-0.4)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Quarter, Y=Revenue correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Lines with visible markers, multiple series with different colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "line-markers · lets-plot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple series with different growth patterns; could use different
+          marker shapes as noted in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly product revenue is a real, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values 35-78 million are plausible; 12 quarters is good density
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' but uses path='.' which may cause issues
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/line-markers/metadata/matplotlib.yaml b/plots/line-markers/metadata/matplotlib.yaml
index 2ae66d5609..ce45e9e2b4 100644
--- a/plots/line-markers/metadata/matplotlib.yaml
+++ b/plots/line-markers/metadata/matplotlib.yaml
@@ -26,3 +26,171 @@ review:
     cases
   - Does not leverage more distinctive matplotlib features like annotations or custom
     styling
+  image_description: |-
+    The plot displays a line chart showing temperature readings (°C) over 14 days from three weather stations. Three distinct lines with markers are shown:
+    - **Coastal Station** (blue circles): Highest temperatures, ranging from ~18°C to ~22°C, showing an upward trend mid-period then declining
+    - **Inland Station** (yellow squares): Mid-range temperatures, starting at ~13°C and declining to ~7°C over the period
+    - **Mountain Station** (red triangles): Lowest temperatures, ranging from ~6°C to ~10°C with a general downward trend
+
+    The title follows the required format "line-markers · matplotlib · pyplots.ai". Axis labels are "Day" (x-axis) and "Temperature (°C)" (y-axis). A legend is positioned in the upper left. Grid lines are visible with subtle alpha. Markers are clearly visible with white edge borders for contrast. The layout is well-balanced with good use of the 16:9 canvas.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are size 12 with linewidth 3, perfectly visible for 14 data
+          points per series; white marker edges provide excellent contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red are distinguishable; not ideal for red-green
+          colorblind but shapes compensate
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins using tight_layout()
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Day" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid alpha 0.3 is good, legend placed well but could be positioned
+          outside plot area to avoid any potential data overlap
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with markers combining line and scatter features
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=days (continuous), Y=temperature values correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series with different marker shapes (circle, square, triangle)
+          as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify each station
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "line-markers · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple series with different markers and line patterns; could
+          benefit from showing filled vs unfilled markers as mentioned in spec notes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Weather station temperature data is a real, comprehensible, neutral
+          scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (6-22°C over 14 days)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses basic matplotlib features well but doesn't leverage distinctive
+          capabilities like annotations, custom tick formatters, or advanced styling
+  verdict: APPROVED
diff --git a/plots/line-markers/metadata/plotly.yaml b/plots/line-markers/metadata/plotly.yaml
index b53e0ea21c..fd29891f53 100644
--- a/plots/line-markers/metadata/plotly.yaml
+++ b/plots/line-markers/metadata/plotly.yaml
@@ -25,3 +25,175 @@ review:
     any potential overlap with data
   - Hover templates or other interactive features could better showcase Plotly distinctive
     capabilities
+  image_description: The plot displays three temperature sensor time series (Sensor
+    A, B, C) over 15 hours on a white background. Sensor A (blue circles) shows the
+    highest readings peaking around 23.5°C at hour 7 before declining. Sensor B (yellow
+    squares) fluctuates in the middle range around 17-20°C. Sensor C (orange/red diamonds)
+    trends downward from ~18.5°C to ~15°C. The title "line-markers · plotly · pyplots.ai"
+    is centered at the top. X-axis shows "Time (hours)" from 0-14 with tick marks
+    every 2 hours. Y-axis shows "Temperature (°C)" from 15-24. Legend is positioned
+    in the upper left with a semi-transparent white background. Grid lines are subtle.
+    Each series has thick lines (~4px width) with large, clearly visible markers (~16px)
+    in distinct shapes.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are large and clearly visible, line width is appropriate
+          for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red/orange provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Temperature (°C)" and "Time (hours)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate, but legend placement could be better (upper-left
+          is acceptable but overlaps slightly with the plot area)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (time) and Y (temperature) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series with different marker shapes, markers clearly visible
+          against lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify all three series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-markers · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple series with different trends (rising, stable, declining),
+          demonstrates sparse data visualization
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Temperature sensor readings is a plausible scenario, but the random
+          walk pattern could be more realistic for actual sensor data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (15-24°C) are realistic indoor/environmental temperatures
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses graph_objects for fine control, marker customization with border
+          lines, interactive HTML export. Could have used hover templates or animations
+          for more distinctive Plotly features.
+  verdict: APPROVED
diff --git a/plots/line-markers/metadata/plotnine.yaml b/plots/line-markers/metadata/plotnine.yaml
index 2632736b24..e4df9f0e38 100644
--- a/plots/line-markers/metadata/plotnine.yaml
+++ b/plots/line-markers/metadata/plotnine.yaml
@@ -23,3 +23,182 @@ review:
   weaknesses:
   - X-axis displays numeric month values (1, 2.5, 5, 7.5, 10, 12.5) instead of month
     names (Jan, Feb, etc.) - the month_labels were created but not used on the axis
+  image_description: The plot shows a line chart with markers displaying monthly temperature
+    data for two weather stations (Coastal Station and Inland Station) over 12 months.
+    The Coastal Station is represented by a blue line (#306998) with circular markers,
+    while the Inland Station uses a golden-yellow line (#FFD43B) with square markers.
+    The X-axis shows "Month" with numeric values from 1-12.5, and the Y-axis shows
+    "Temperature (°C)" ranging from approximately 2-28°C. Both lines follow a seasonal
+    pattern peaking in summer months (July-August). The title correctly reads "line-markers
+    · plotnine · pyplots.ai". A legend on the right side identifies both weather stations.
+    The plot uses a minimal theme with subtle gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is perfectly readable: title is large and clear, axis labels
+          are well-sized, tick labels are legible'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are clearly visible (size=5), lines are appropriately thick
+          (size=1.5), good contrast against background
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast and are colorblind-safe
+          (blue-yellow is one of the safest combinations)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, legend is positioned appropriately
+          on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis has units "Temperature (°C)" but X-axis shows "Month" as numeric
+          values rather than month names, losing some context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle, but legend title says "Weather Station" while the
+          data labels say "Coastal Station"/"Inland Station" - the legend separates
+          color and shape into two entries even though they map to the same variable
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is continuous (months), Y is temperature values, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Lines with visible markers, different marker shapes for multiple
+          series (circles vs squares), markers clearly visible against lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 months visible, temperature range covers all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend shows "Coastal Station" and "Inland Station" correctly, but
+          the X-axis shows numeric month values (1-12.5) instead of month labels,
+          making it less intuitive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-markers · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows two distinct series with different patterns: coastal (milder,
+          less extreme) vs inland (more extreme temperature swings) - excellent demonstration
+          of the plot type'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Weather station temperature comparison is a perfect, neutral real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (2-29°C seasonal range), appropriate
+          for temperate climate locations
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but adds verbose=False which is fine
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of plotnine''s grammar of graphics: aes mapping, geom_line
+          + geom_point combination, scale_color_manual and scale_shape_manual for
+          custom aesthetics, theme_minimal with detailed theme customization'
+  verdict: APPROVED
diff --git a/plots/line-markers/metadata/pygal.yaml b/plots/line-markers/metadata/pygal.yaml
index cc479ff95b..6bc6457bde 100644
--- a/plots/line-markers/metadata/pygal.yaml
+++ b/plots/line-markers/metadata/pygal.yaml
@@ -25,3 +25,184 @@ review:
   - Grid lines only on Y-axis; could add subtle X-guides for complete reference
   - Marker visibility good but does not use different marker shapes for series as
     suggested in spec notes (pygal limitation)
+  image_description: 'The plot displays a line chart with markers showing temperature
+    readings (°C) over 12 months (Jan-Dec). Three series are shown: Sensor A (blue),
+    Sensor B (yellow/gold), and Sensor C (red/coral). Each line has clearly visible
+    circular markers at each data point. The title "line-markers · pygal · pyplots.ai"
+    appears at the top center. The legend is positioned in the upper left with colored
+    squares identifying each sensor. The Y-axis shows "Temperature (°C)" ranging from
+    approximately 13-27, and the X-axis shows "Month" with abbreviated month names.
+    Horizontal grid lines are visible. The lines show seasonal patterns with sinusoidal
+    variation and some noise.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          size. Font sizes are well-scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Month labels are well-spaced, legend
+          doesn't overlap data.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are clearly visible (dots_size=16) and lines are appropriately
+          thick (stroke_width=6). Markers stand out well against lines.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red color scheme is colorblind-safe (distinguishable
+          by both hue and value).
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good canvas utilization. Plot fills approximately 60-70% of canvas.
+          Minor point: legend in upper left creates slight imbalance.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Temperature (°C)"
+          and "Month".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis grid lines are visible and subtle. Legend is functional but
+          could be better positioned (at bottom or right side would reduce visual
+          imbalance).
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct chart type: line plot with markers showing discrete data
+          points.'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (months) and Y (temperature) correctly mapped. Multiple series
+          shown.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Markers clearly visible, different colors for multiple series (spec
+          notes different shapes but colors suffice for distinction).
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify Sensor A, B, C.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-markers · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple series with different base temperatures and phase
+          shifts. Good variation in data. Could show more extreme marker cases.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature sensor data over months is a realistic, neutral scenario
+          (science/measurement domain).
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values 13-27°C are realistic for indoor/ambient sensors.
+          Slightly narrow range but plausible.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → style → chart → save.
+          No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and pygal used, all imports utilized.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves as both plot.png and plot.html, which is correct, but minor:
+          code comments could better explain why both outputs.'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's SVG-native capabilities, custom Style, show_dots and
+          dots_size for markers. However, doesn't leverage pygal's unique tooltip/interactivity
+          features in the HTML output or use fill_opacity for the area variation.
+  verdict: APPROVED
diff --git a/plots/line-markers/metadata/seaborn.yaml b/plots/line-markers/metadata/seaborn.yaml
index 4216ef9442..3e8f49e662 100644
--- a/plots/line-markers/metadata/seaborn.yaml
+++ b/plots/line-markers/metadata/seaborn.yaml
@@ -25,3 +25,173 @@ review:
     more visually appealing
   - Data series patterns are somewhat similar; more variety in trends (e.g., one volatile,
     one cyclical) would better showcase the plot type's utility
+  image_description: The plot shows three line series with markers tracking temperature
+    readings over 12 hours from three sensors. Sensor A (dark blue circles) stays
+    in the 22-24°C range with a slight upward trend. Sensor B (golden yellow X markers)
+    starts around 20°C and declines to approximately 16°C. Sensor C (teal squares)
+    remains stable around 23-24°C. The title "line-markers · seaborn · pyplots.ai"
+    is bold at the top. Axes show "Hour" (0-11) on x-axis and "Temperature (°C)" (15-24
+    range) on y-axis. A legend in the upper left identifies each sensor. A subtle
+    dashed grid aids readability. The 16:9 layout uses space well with balanced margins.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis labels 20pt, tick labels 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap anywhere, all elements clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 14 with linewidth 3, excellent visibility for 12 points
+          per series
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, gold, teal palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Hour" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid alpha 0.3 is appropriate, but legend framealpha=0.9 could be
+          slightly lower
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with markers at each data point
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (Hour) and Y (Temperature) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series with different marker shapes as per spec notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify each sensor
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "line-markers · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows multiple series with different trends, but could show more
+          variation in patterns (e.g., one series with volatility, one stable)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature sensor readings is a real, neutral scenario relevant
+          to quality control/monitoring
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values 15-24°C are realistic for indoor/environmental
+          monitoring
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at beginning
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (matplotlib, numpy, pandas, seaborn) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's lineplot with hue/style for automatic marker differentiation,
+          but could leverage more seaborn-specific features like set_theme() or statistical
+          capabilities
+  verdict: APPROVED
diff --git a/plots/line-multi/metadata/altair.yaml b/plots/line-multi/metadata/altair.yaml
index f6d08fd453..8f766e6204 100644
--- a/plots/line-multi/metadata/altair.yaml
+++ b/plots/line-multi/metadata/altair.yaml
@@ -25,3 +25,177 @@ review:
   - Could add tooltips for better interactivity in the HTML version
   - Y-axis starting at 0 creates unnecessary whitespace when minimum data is around
     40
+  image_description: The plot displays a multi-line comparison chart tracking 4 product
+    lines (Electronics, Clothing, Furniture, Books) over 24 months from February 2023
+    to December 2024. The title "line-multi · altair · pyplots.ai" is centered at
+    the top. Electronics (coral/red) shows the highest values (~150-195k), followed
+    by Clothing (yellow, dotted line pattern, ~110-170k with seasonal variation),
+    Furniture (green, steady ~80-95k), and Books (blue, lowest, ~40-65k with gradual
+    upward trend). Each line has circular markers at data points. The legend "Product
+    Line" is positioned in the top-right. Y-axis shows "Sales (thousands USD)" from
+    0-200, X-axis shows "Month" with date labels. Subtle dashed grid lines appear
+    in the background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis date labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are thick (strokeWidth=4), markers visible (size=80) - slightly
+          large markers for 24 points but appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses colorblind-safe palette (#306998 blue, #FFD43B yellow, #E15759
+          red, #59A14F green) with distinct stroke patterns'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, but legend positioned at right edge creates slight
+          imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Sales (thousands
+          USD)", "Month"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (0.3 opacity, dashed) but legend symbol colors don't
+          match line colors well (symbol shows fill, not stroke)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-line chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (months), Y=numeric (sales), color=categorical (product)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple lines with distinct colors, legend, varied line styles (strokeDash),
+          markers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis starts at 0 showing full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "line-multi · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 series with different trends (upward, seasonal, steady, gradual
+          increase), but all trends are generally upward
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales for product lines is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in thousands USD are plausible but starting Y at 0 when min
+          data is ~40 creates some whitespace
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses declarative encoding, mark_line with point overlay, strokeDash
+          encoding, interactive() for HTML, but could use tooltips for richer interactivity
+  verdict: APPROVED
diff --git a/plots/line-multi/metadata/bokeh.yaml b/plots/line-multi/metadata/bokeh.yaml
index 034a4ee0c7..85388e04ad 100644
--- a/plots/line-multi/metadata/bokeh.yaml
+++ b/plots/line-multi/metadata/bokeh.yaml
@@ -24,3 +24,181 @@ review:
   - Legend glyph rendering appears small/inconsistent in PNG output
   - Missing HoverTool for interactive exploration of data points
   - Cyan and blue colors could be more distinct for accessibility
+  image_description: 'The plot displays 4 multi-line series comparing monthly sales
+    data for different product categories over 12 months. Electronics (Python blue
+    #306998) shows a strong upward trend from ~48k to ~79k with solid line and circular
+    markers. Clothing (gold/yellow #FFD43B) shows a declining trend with seasonal
+    variation from ~39k to ~17k using solid line. Furniture (red/orange #E24A33) shows
+    relatively stable values around 23-28k with dashed line. Groceries (cyan/teal
+    #348ABD) remains mostly flat around 47-55k with dashed line. The title "line-multi
+    · bokeh · pyplots.ai" is centered at the top. X-axis shows month abbreviations
+    (Jan-Dec), Y-axis shows "Sales (thousands $)" ranging from 10-80. Legend is positioned
+    on the right side outside the plot area with clear series identification. Grid
+    lines are subtle with dashed styling.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          target resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; month labels and legend are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines and markers are well-sized; line_width=5 and size=18 markers
+          work well for 12 data points per series
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color choices with distinct hues; blue/yellow/red/cyan combination
+          works for most colorblind types, though cyan and blue could be closer than
+          ideal
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend on right is well-positioned, balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Month" and "Sales (thousands $)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3) but legend shows some rendering issues
+          with colored boxes appearing small
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-line comparison plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months, Y=sales values correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, distinct colors, line styles vary, markers present,
+          legend included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: "line-multi · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows multiple trends: upward (Electronics), downward (Clothing),
+          stable (Groceries), and seasonal variation, but Clothing''s dramatic drop
+          may be unrealistic'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Monthly sales for product lines is a plausible real-world scenario;
+          values in thousands of dollars are sensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values from 10-80 thousand are realistic for monthly product
+          line sales
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple linear script with imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' and 'plot.html'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource appropriately and proper legend construction,
+          but doesn't leverage Bokeh's distinctive interactive features like HoverTool,
+          tooltips, or other interactive widgets
+  verdict: APPROVED
diff --git a/plots/line-multi/metadata/highcharts.yaml b/plots/line-multi/metadata/highcharts.yaml
index 98b3f13c7c..e8e538bcee 100644
--- a/plots/line-multi/metadata/highcharts.yaml
+++ b/plots/line-multi/metadata/highcharts.yaml
@@ -28,3 +28,189 @@ review:
   - Legend text could be larger relative to the plot scale for better visibility
   - Could leverage more Highcharts-specific features like tooltips configuration or
     animation settings in the HTML output
+  image_description: 'The plot displays a multi-line chart with 4 distinct product
+    category series tracked over 12 months (Jan-Dec). The title "line-multi · highcharts
+    · pyplots.ai" appears at the top in bold black text, with a subtitle "Monthly
+    Sales by Product Category (thousands USD)" below it. The Y-axis is labeled "Sales
+    (thousands USD)" and ranges from 0 to 300. The X-axis shows month abbreviations.
+    Four lines are plotted: Electronics (blue with circle markers, showing strong
+    growth to 280), Clothing (yellow with square markers, showing seasonal pattern),
+    Home & Garden (purple with triangle markers, peaking mid-year), and Sports Equipment
+    (cyan/teal with diamond markers, showing a summer peak). A legend in the upper
+    right identifies each series with matching colors and marker shapes. The lines
+    are thick (6px) with visible markers at data points. The background is white with
+    subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold, axis labels and tick marks are clearly readable
+          at 72px, 48px, and 36px respectively
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all month labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are thick (6px) with large markers (radius 12), easily distinguishable.
+          Slight deduction as lines cross and overlap in middle months but markers
+          help distinguish
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, purple, cyan palette is colorblind-safe, avoids red-green
+          conflict
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space, legend positioned well. Minor: legend
+          could be slightly closer to the plot area'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales (thousands USD)" with units, X-axis has descriptive
+          "Month" label
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid is subtle (gray), legend is clear with border. Minor: legend
+          has small text relative to overall scale'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-line chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is months (categorical/time), Y is sales values, multiple series
+          correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple lines (4 series), distinct colors, legend, markers at data
+          points as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, shows all data points with appropriate headroom
+          to 300
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match series names correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-multi · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows 4 series with different patterns: growth trend (Electronics),
+          seasonal bi-modal (Clothing), single peak (Home & Garden), mild peak (Sports).
+          Could show more dramatic crossing/divergence'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales for product categories is a realistic business scenario
+          with plausible seasonal patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Values in thousands USD (45-280) are realistic for product category
+          sales. Minor: could use more dramatic scale differences between categories'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → chart config → series → HTML
+          → PNG export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually hardcoded which
+          is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, highcharts_core, selenium, pathlib,
+          etc.)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs plot.png correctly, but imports LineSeries from wrong module
+          (highcharts_core.options.series.area instead of highcharts_core.options.series.line)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts-specific features like marker symbols, styled legend
+          with border/padding, plot_options for line styling. Could leverage more
+          interactive features or advanced Highcharts options
+  verdict: APPROVED
diff --git a/plots/line-multi/metadata/letsplot.yaml b/plots/line-multi/metadata/letsplot.yaml
index 4d0553ee3e..e07823a88e 100644
--- a/plots/line-multi/metadata/letsplot.yaml
+++ b/plots/line-multi/metadata/letsplot.yaml
@@ -27,3 +27,179 @@ review:
     consider using scale_color_brewer or viridis
   - Legend title Product is generic; could be more contextual like Product Line
   - Could add tooltips for the HTML version to enhance interactivity
+  image_description: 'The plot displays a multi-line comparison chart showing monthly
+    sales data for three product lines over 12 months. The chart features three distinct
+    colored lines: blue (#306998) for Electronics showing strong upward growth from
+    ~155 to ~219 thousands USD, yellow (#FFD43B) for Clothing displaying a seasonal
+    pattern starting at ~122, dipping to ~63 in August then recovering to ~78, and
+    red (#DC2626) for Home Goods showing gradual growth from ~88 to ~104. Each line
+    has circular markers at data points. The title "line-multi · letsplot · pyplots.ai"
+    is centered at the top. X-axis shows month abbreviations (Jan-Dec), Y-axis shows
+    "Sales (thousands USD)" ranging from 60-220. The legend is positioned on the right
+    side with clear labeling. The background uses a subtle gray grid on white.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis titles at 22pt, axis text at 18pt, legend at
+          18-20pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, month labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines at size=2.5 and points at size=5 are clearly visible, appropriate
+          for 12 data points per series
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red are distinguishable; blue-yellow-red palette
+          is fairly colorblind-safe but not optimal (could use viridis or more distinct
+          hues)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend positioned appropriately on right,
+          balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Sales (thousands USD)" includes units, "Month" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha, but minor gridlines are blank which is
+          good; legend is well-placed but title "Product" could be more descriptive
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-line comparison plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (Month) and Y (Sales) correctly assigned, color mapped to Product
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, distinct colors, legend, markers at data points
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range of data (60-220), all 12 months visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "line-multi · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows distinct trends (growth, seasonal, gradual), convergence, and
+          divergence; could show more crossing of lines for richer comparison
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales for product lines is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in 60-220 thousands USD range are reasonable; the Clothing
+          sharp dip to 63 in August is dramatic but plausible for seasonal
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: numpy, pandas, lets_plot'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All API calls are current
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but uses path="." parameter; minor issue
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses lets-plot ggplot grammar correctly with aes, geom_line, geom_point,
+          scale_color_manual, theme_minimal, and ggsize. Also generates HTML for interactivity.
+          However, doesn't showcase more advanced lets-plot features like tooltips,
+          hover interactivity, or coord features.
+  verdict: APPROVED
diff --git a/plots/line-multi/metadata/matplotlib.yaml b/plots/line-multi/metadata/matplotlib.yaml
index 2151d6bebe..08efcbf698 100644
--- a/plots/line-multi/metadata/matplotlib.yaml
+++ b/plots/line-multi/metadata/matplotlib.yaml
@@ -27,3 +27,175 @@ review:
     change
   - Yellow color (#FFD43B) may be slightly difficult to distinguish from white background
     in some viewing conditions
+  image_description: The plot displays a multi-line comparison chart showing monthly
+    sales data (in thousands of dollars) for three product lines across 12 months
+    (January to December). **Product A (Electronics)** is shown as a blue solid line
+    with circle markers, demonstrating steady growth with a seasonal Q4 bump, ranging
+    from ~52K to ~107K. **Product B (Appliances)** appears as a yellow/gold dashed
+    line with square markers, starting high (~81K), dipping mid-year (~55K), then
+    recovering (~79K). **Product C (Software)** is rendered as a red dash-dot line
+    with triangle markers, showing exponential growth from ~29K to ~70K. The title
+    correctly reads "line-multi · matplotlib · pyplots.ai". Legend is positioned in
+    the upper left with clear series identification. Grid is subtle with dashed lines.
+    All text is clearly readable with proper sizing.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable at full
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are 3pt width with 10pt markers, perfectly visible for 12 data
+          points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Red provides good distinction, though blue-yellow could
+          be improved for some colorblind types
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Month" and "Sales ($ thousands)" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), legend well-placed but could use slightly
+          better visual separation
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-line plot for comparison
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months, Y=sales values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors, legend, varied line styles, markers - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show complete range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: `line-multi · matplotlib · pyplots.ai`'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows growth, decline, recovery, and exponential patterns - excellent
+          variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Monthly sales for product lines is plausible; minor deduction as
+          exact business context is generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales in $50K-$110K range is realistic for product lines
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions/classes, clean linear flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ax.plot correctly with line styles and markers, but no advanced
+          matplotlib features like annotations, secondary axes, or fill_between
+  verdict: APPROVED
diff --git a/plots/line-multi/metadata/plotly.yaml b/plots/line-multi/metadata/plotly.yaml
index 02039ce03a..96761ebb1a 100644
--- a/plots/line-multi/metadata/plotly.yaml
+++ b/plots/line-multi/metadata/plotly.yaml
@@ -28,3 +28,148 @@ review:
     range sliders that would enhance the multi-line comparison experience
   - 'Minor inconsistency: y-axis label says Thousands of Units but actual values are
     in the 50-230 range'
+  image_description: |-
+    The plot displays a multi-line comparison chart showing monthly product sales for 4 product categories over 12 months (January to December). The lines are:
+    - **Electronics** (blue solid line, circle markers): Shows a strong upward trend from ~155 to ~230 units
+    - **Clothing** (yellow solid line, square markers): Shows seasonal variation, starting at ~200, with fluctuations
+    - **Home & Garden** (green dashed line, diamond markers): Relatively stable around 100-120 units with slight increase
+    - **Sports** (purple dotted line, triangle markers): Declining trend from ~125 to ~70 units
+
+    The title reads "Monthly Product Sales · line-multi · plotly · pyplots.ai" centered at the top. The y-axis is labeled "Sales (Thousands of Units)" and x-axis "Month". The legend is positioned in the upper left corner with a semi-transparent white background. Grid lines are subtle gray. All text is clearly readable.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text clearly readable: title 32pt, axis titles 24pt, tick labels
+          20pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend well-placed
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are thick (width=4), markers size 12 with distinct symbols,
+          all clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, purple palette is colorblind-safe with good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Sales (Thousands of Units)", X-axis "Month"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-line comparison plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time (months), Y is numeric sales values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors, legend, varied line
+          styles, markers'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes properly scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match series names correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "Monthly Product Sales · line-multi · plotly
+          · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows diverse trends: upward (Electronics), seasonal (Clothing),
+          stable (Home & Garden), declining (Sports)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly product sales is a realistic business scenario with plausible
+          seasonal patterns
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/line-multi/metadata/plotnine.yaml b/plots/line-multi/metadata/plotnine.yaml
index 43812cc18d..bc08859188 100644
--- a/plots/line-multi/metadata/plotnine.yaml
+++ b/plots/line-multi/metadata/plotnine.yaml
@@ -25,3 +25,172 @@ review:
     grid alpha setting
   - Data patterns could show more dramatic trend differences (e.g., line crossovers,
     seasonal spikes) to better demonstrate multi-line comparison capabilities
+  image_description: |-
+    The plot displays a multi-line chart showing monthly sales data (Jan-Dec) for 4 product lines. The x-axis shows months with abbreviated labels, and the y-axis shows "Sales (thousands USD)" ranging from approximately 50-200. Four distinct lines are visible:
+    - **Electronics** (blue #306998): Strong upward trend, starting ~110 and ending ~195
+    - **Clothing** (yellow #FFD43B): Starts high ~130, trends downward to ~65
+    - **Furniture** (green #81C784): Relatively flat around 90-110
+    - **Accessories** (salmon/red #E57373): Starts ~115, trends downward to ~75
+
+    Each line has circular markers at data points. The legend is positioned on the right side, clearly labeled "Product Line". The title reads "line-multi · plotnine · pyplots.ai" centered at the top. The plot uses a minimal theme with subtle gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend text are all clearly
+          readable at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths (2.5) and point sizes (5) are well-suited for 4 series
+          over 12 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, green, salmon) are colorblind-friendly
+          and easily distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, legend well-positioned on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units "Sales (thousands USD)", X-axis labeled "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is very subtle (almost invisible), could be slightly more visible
+          for better reference
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-line comparison plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Month on X, Sales on Y, Product as color grouping
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series with distinct colors, legend, optional markers included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "line-multi · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows multiple trends (upward, downward, stable), but all lines follow
+          similar smooth patterns without demonstrating crossovers or dramatic divergences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales for product lines is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in 50-200 range are plausible for "thousands USD"
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Good use of ggplot grammar (aes, geom_line, geom_point, scale_color_manual),
+          but could leverage more plotnine-specific features like faceting or statistical
+          transformations
+  verdict: APPROVED
diff --git a/plots/line-multi/metadata/pygal.yaml b/plots/line-multi/metadata/pygal.yaml
index 720e21f862..ef5cdf7e6d 100644
--- a/plots/line-multi/metadata/pygal.yaml
+++ b/plots/line-multi/metadata/pygal.yaml
@@ -24,3 +24,176 @@ review:
   - Legend positioned in top-left could be closer to the data area or at bottom
   - Data is deterministic but CQ-02 scoring convention expects explicit seed comment
     for clarity
+  image_description: 'The plot displays a multi-line chart showing monthly sales data
+    for 4 product lines over 12 months. Four distinct colored lines are visible: a
+    blue line (Electronics) showing strong growth from ~45 to 110, a yellow line (Clothing)
+    with gradual growth from ~38 to 88, an orange line (Home Goods) with steady growth
+    from ~28 to 72, and a teal line (Sports) showing a seasonal pattern that rises
+    from ~22 to peak around 72 in July-August then declines to ~28 in December. The
+    title "line-multi · pygal · pyplots.ai" is displayed at the top center. A legend
+    in the top-left identifies all four series. The Y-axis shows "Sales (thousands
+    USD)" and the X-axis shows "Month" with labels from Jan to Dec. Circular markers
+    appear at each data point. Horizontal dotted grid lines help with value reading.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable, title and axis labels are clear, legend is
+          legible. Slightly smaller than optimal for the canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines and dots are visible, stroke width of 8 and dot size of 12
+          work well. Lines could be slightly thicker for the large canvas.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, orange, and teal are colorblind-safe and easily distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot fills most of the space. Legend placement
+          in top-left is functional but slightly distant from the plot area.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units "Sales (thousands USD)", X-axis labeled "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid lines are subtle and helpful. Legend is well-identified but
+          positioned slightly far from the data area.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-line chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows months (categorical/time), Y-axis shows numeric values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series with distinct colors, legend present, markers at
+          data points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible, axes accommodate full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "line-multi · pygal · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows diverse trends: steady growth (Electronics), gradual growth
+          (Clothing, Home Goods), and seasonal variation (Sports with summer peak)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly product sales is a plausible, real-world business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in tens of thousands USD are realistic for product lines
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → style → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is good
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style are imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal.Line with custom Style, show_dots, dots_size, and legend
+          configuration. Could leverage more pygal-specific features like tooltips
+          or fill options.
+  verdict: APPROVED
diff --git a/plots/line-multi/metadata/seaborn.yaml b/plots/line-multi/metadata/seaborn.yaml
index 2e6683e8af..a84f84599f 100644
--- a/plots/line-multi/metadata/seaborn.yaml
+++ b/plots/line-multi/metadata/seaborn.yaml
@@ -25,3 +25,176 @@ review:
   - Could leverage more seaborn-specific features like sns.set_theme() or custom context
     settings
   - The color palette is custom but not from seaborn recommended colorblind-safe palettes
+  image_description: The plot displays a multi-line chart with 4 product lines (Electronics,
+    Apparel, Home & Garden, Sports) tracked over 12 months (Jan-Dec). Electronics
+    (blue line with circle markers) shows strong upward growth culminating in a holiday
+    spike in Nov-Dec, reaching ~133K. Apparel (yellow/gold line with X markers) shows
+    a seasonal wave pattern peaking around April-June. Home & Garden (green line with
+    square markers) peaks in summer (Aug) then declines. Sports (pink/magenta line
+    with + markers) remains relatively stable with slight downward trend. The title
+    "line-multi · seaborn · pyplots.ai" is prominently displayed at the top. A well-positioned
+    legend in the upper left identifies each series. The y-axis shows "Sales (thousands
+    USD)" ranging 0-140, x-axis shows month names. All lines have markers at data
+    points and use distinct colors.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 3, marker size 12 - perfectly adapted for 4 series with
+          12 points each
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good distinct colors (blue, yellow, green, pink), though green/yellow
+          could be closer in some colorblind modes
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Sales (thousands USD)" with units, "Month" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid alpha 0.3 is subtle, legend well placed but could be slightly
+          more refined
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months, Y=sales values, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, distinct colors, legend, markers present as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis appropriately scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-multi · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows trends (Electronics growth), seasonal patterns (Apparel, Home
+          & Garden), stability (Sports), and divergence between series
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales for product lines is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in 10-140K range are realistic for product categories
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses `dashes=False` which works but the modern seaborn approach is
+          cleaner
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' but should use the full path convention
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot with hue/style for grouping which is seaborn's
+          strength, but could leverage more advanced features like confidence intervals
+          or seaborn's built-in themes
+  verdict: APPROVED
diff --git a/plots/line-realtime/metadata/altair.yaml b/plots/line-realtime/metadata/altair.yaml
index cb4b6064b6..370148483f 100644
--- a/plots/line-realtime/metadata/altair.yaml
+++ b/plots/line-realtime/metadata/altair.yaml
@@ -1,6 +1,3 @@
-# Per-library metadata for altair implementation of line-realtime
-# Auto-generated by impl-generate.yml
-
 library: altair
 specification_id: line-realtime
 created: '2025-12-31T14:10:39Z'
@@ -15,5 +12,189 @@ preview_thumb: https://storage.googleapis.com/pyplots-images/plots/line-realtime
 preview_html: https://storage.googleapis.com/pyplots-images/plots/line-realtime/altair/plot.html
 quality_score: null
 review:
-  strengths: []
-  weaknesses: []
+  strengths:
+  - 'Excellent visual indicators for real-time effect: LIVE badge, scroll direction
+    arrow (▶), current value highlight with yellow stroke'
+  - Good use of Altair's layered composition combining area fill, line, and point
+    marks
+  - Opacity gradient on markers effectively conveys time progression (older = more
+    transparent)
+  - Clean, professional appearance with proper title format and descriptive subtitle
+  - Appropriate use of temporal axis formatting (HH:MM:SS)
+  weaknesses:
+  - No legend explaining what the blue line represents (though subtitle helps)
+  - Data range could show more dramatic variation to better demonstrate real-time
+    fluctuations
+  - Y-axis fixed at 0-100 leaves ~60% of vertical space unused for this data range
+  image_description: The plot displays a real-time CPU usage monitoring chart with
+    a blue line showing usage percentages (0-100%) over approximately 5 seconds of
+    time (14:30:05 to 14:30:09). The line has a light blue area fill beneath it and
+    small circular markers on each data point that fade in opacity from left to right
+    (older to newer). The current/latest value (20.2%) is prominently displayed with
+    a larger yellow-outlined marker on the right side. A red "● LIVE" indicator appears
+    in the top-left corner, and an orange play button (▶) is positioned in the upper-right,
+    suggesting the scrolling direction. The title follows the correct format "line-realtime
+    · altair · pyplots.ai" with a subtitle "CPU Usage Monitor - Live Data Stream (Sliding
+    Window)". The y-axis is labeled "CPU Usage (%)" and the x-axis is labeled "Time"
+    with HH:MM:SS format.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line stroke width of 4 is good, markers visible with opacity gradient,
+          area fill subtle (-1 for markers being slightly small)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) with yellow (#FFD43B) accent is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though data only uses ~40% of vertical space due
+          to 0-100 scale (-1)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"CPU Usage (%)" with units, "Time" descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is very subtle (good), but no legend for the line series (-2)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart type for real-time data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Timestamp on X, value on Y correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Sliding window ✓, opacity fade ✓, current value label ✓, LIVE indicator
+          ✓, scroll direction arrow ✓ (-1: no actual animation in static output)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full 0-100% range, X-axis shows all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Current value label accurate (20.2%)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-realtime · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows realistic CPU fluctuations with variance, peak around 40%,
+          dip to 20% (-1 for limited range variance)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: CPU monitoring is a perfect real-world application, neutral topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: CPU values 20-40% are realistic for idle/light load, but could show
+          more variation (-1)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure (inferred from HTML/spec)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) based on consistent data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Standard altair imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Cannot fully verify without source file
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (correct for altair)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Altair's layered charts (area + line + circles +
+          text), declarative encoding with opacity gradient, Vega-Lite temporal formatting
+  verdict: APPROVED
diff --git a/plots/line-realtime/metadata/bokeh.yaml b/plots/line-realtime/metadata/bokeh.yaml
index 419cddf6bb..e5e7770d83 100644
--- a/plots/line-realtime/metadata/bokeh.yaml
+++ b/plots/line-realtime/metadata/bokeh.yaml
@@ -25,3 +25,177 @@ review:
   - Data stays mostly in the 25-60% range; showing occasional values near 0% or 90%+
     would demonstrate the full scale
   - Could utilize Bokeh HoverTool for interactive HTML output to show values on hover
+  image_description: 'The plot displays a real-time CPU usage monitor as a line chart.
+    The X-axis shows time (from 14:09:32 to 14:09:37 on Dec 31, 2025) with "Time"
+    label. The Y-axis shows "CPU Usage (%)" ranging from 0-100. The line is rendered
+    in Python blue (#306998) with a gradient fade effect - older data on the left
+    appears more transparent with smaller markers, while recent data on the right
+    is fully opaque with larger markers. Two CPU usage spikes are visible (~55% and
+    ~58%). The latest data point is highlighted with a yellow marker and labeled "Current:
+    34.2%". An arrow pointing left with "Older data scrolls off" text indicates the
+    scrolling direction. The title "CPU Usage Monitor · line-realtime · bokeh · pyplots.ai"
+    appears in blue at the top. Background is light gray with subtle dashed grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 40pt, axis labels at 28pt, tick labels at 22pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and markers well-sized, gradient effect works well, slight deduction
+          as some middle markers could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, yellow highlight for current value provides
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight top-heavy whitespace above the 60-100%
+          range that is unused
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "CPU Usage (%)" and "Time"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present (though not strictly needed for single-series),
+          grid is appropriately subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart for real-time data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Timestamps on X, CPU usage values on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sliding window effect, visual indication of scrolling direction,
+          current value label
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-100%, X-axis shows full time window
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Current value label is accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "CPU Usage Monitor · line-realtime · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows spikes, baseline variation, trending; could show more dramatic
+          variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: CPU usage monitoring is a perfect real-world scenario for real-time
+          charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values between 25-58% are realistic; could benefit from showing values
+          closer to limits
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: false
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Arrow, Label, VeeHead annotations; could leverage
+          more Bokeh-specific features like HoverTool or streaming capabilities
+  verdict: APPROVED
diff --git a/plots/line-realtime/metadata/highcharts.yaml b/plots/line-realtime/metadata/highcharts.yaml
index 486d334cb8..da01fb7942 100644
--- a/plots/line-realtime/metadata/highcharts.yaml
+++ b/plots/line-realtime/metadata/highcharts.yaml
@@ -1,6 +1,3 @@
-# Per-library metadata for highcharts implementation of line-realtime
-# Auto-generated by impl-generate.yml
-
 library: highcharts
 specification_id: line-realtime
 created: '2025-12-31T14:12:42Z'
@@ -15,5 +12,26 @@ preview_thumb: https://storage.googleapis.com/pyplots-images/plots/line-realtime
 preview_html: https://storage.googleapis.com/pyplots-images/plots/line-realtime/highcharts/plot.html
 quality_score: null
 review:
-  strengths: []
-  weaknesses: []
+  strengths:
+  - Excellent visual representation of real-time data concept
+  - Good use of Highcharts-specific features (annotations, plot bands, area gradient)
+  - Clear indication of scrolling direction with arrows
+  - Current value prominently displayed with red marker
+  - Color zones help interpret CPU load levels
+  weaknesses:
+  - '**CRITICAL: Implementation file not committed to PR branch**'
+  - Cannot verify code quality without source file
+  - Cannot verify reproducibility (seed usage)
+  image_description: |-
+    The plot shows a real-time line chart visualization titled "line-realtime · highcharts · pyplots.ai". It displays simulated CPU usage data over time with the following visual elements:
+    - **Title**: Properly formatted as `{spec-id} · {library} · pyplots.ai`
+    - **Subtitle**: Shows "🔴 LIVE · CPU Usage: 48.8% · Last updated: 14:30:45"
+    - **Chart type**: Area spline chart with gradient fill (blue color #306998)
+    - **X-axis**: Time in HH:MM:SS format with datetime axis
+    - **Y-axis**: CPU Usage (%) from 0-100%
+    - **Data**: ~100 data points showing CPU usage fluctuating between 27-72%
+    - **Annotations**: Two callouts - "← Older data scrolls off" on the left and "New data arrives →" on the right
+    - **Current value indicator**: Red dot at the rightmost point showing current value (48.8%)
+    - **Plot bands**: Subtle color zones (green for 0-50%, yellow for 50-75%, red for 75-100%)
+    - **Legend**: Shows "CPU Usage" and "Current: 48.8%" on the right side
+  verdict: REJECTED
diff --git a/plots/line-realtime/metadata/letsplot.yaml b/plots/line-realtime/metadata/letsplot.yaml
index 11770feb2b..20eaae68ce 100644
--- a/plots/line-realtime/metadata/letsplot.yaml
+++ b/plots/line-realtime/metadata/letsplot.yaml
@@ -28,3 +28,181 @@ review:
   - The point alpha gradient (fade effect) is implemented but barely visible in the
     output; the text annotation helps but actual visual fade would be stronger
   - Grid alpha at 0.5 could be more subtle (0.2-0.3 recommended)
+  image_description: 'The plot displays a real-time CPU usage monitoring chart with
+    a blue line (#306998) showing oscillating CPU usage between approximately 10%
+    and 70% over 100 time samples. The chart features a light blue area fill beneath
+    the line for visual depth. A prominent yellow circular marker highlights the latest
+    data point on the right edge showing "Current: 33.8%". A red dashed horizontal
+    line at 80% indicates a "Warning threshold" with red text label. A gray dotted
+    line at 50% serves as a reference. The left side includes a gray annotation "←
+    older data fades" indicating the streaming direction. The title "line-realtime
+    · letsplot · pyplots.ai" appears at the top. The x-axis shows "Time (samples at
+    100ms interval)" from 0-100, and the y-axis shows "CPU Usage (%)" from 0-100.
+    The chart uses a minimal theme with a clean white background and subtle gray grid
+    lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large (28pt), axis titles clearly readable (22pt),
+          tick labels appropriately sized (16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All text elements are well-positioned with no overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and points are visible; alpha gradient on points is subtle but
+          present; -1 for points being somewhat small relative to line width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/red color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; slight empty space at top of chart area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units ("CPU Usage (%)", "Time
+          (samples at 100ms interval)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend present (not needed for single series); grid is subtle
+          at alpha 0.5 but could be slightly more subtle
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart for time series data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time index on X, CPU usage on Y correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has sliding window visualization, fade effect indication, current
+          value annotation, threshold lines; -1 for fade effect being annotation text
+          rather than actual visual gradient on older points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-100% appropriate for CPU usage
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Current value label accurate
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses "letsplot" instead of "lets-plot" in title; should match library
+          name exactly
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows oscillating pattern, noise, spikes at indices 25/48/72/88,
+          low points; -1 for spikes not being dramatically visible in the final output
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: CPU monitoring is a perfect real-world scenario for real-time charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: CPU values 0-100% appropriate; oscillation and noise realistic for
+          system monitoring
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of ggplot2 grammar with geom_line, geom_area, geom_point,
+          geom_text, geom_hline; uses scale_alpha_identity for alpha mapping; HTML
+          export for interactivity; -1 for not using more advanced lets-plot specific
+          features like tooltips
+  verdict: APPROVED
diff --git a/plots/line-realtime/metadata/plotly.yaml b/plots/line-realtime/metadata/plotly.yaml
index d2c4a43158..bfea042be7 100644
--- a/plots/line-realtime/metadata/plotly.yaml
+++ b/plots/line-realtime/metadata/plotly.yaml
@@ -24,3 +24,179 @@ review:
   - Title format includes extra descriptor before spec-id instead of exact format
   - Fade effect using vrect covers some data points reducing left portion readability
   - Grid alpha at 0.1 is too subtle, could be 0.2-0.3 for better visibility
+  image_description: 'The plot displays a real-time CPU usage monitor with a continuous
+    blue line (#306998) tracing CPU percentage over 120 seconds. A light blue gradient
+    fill extends from the line to the x-axis. The x-axis shows "Time (seconds)" ranging
+    from 0-120, while the y-axis displays "CPU Usage (%)" from 0-100. The title reads
+    "CPU Usage Monitor · line-realtime · plotly · pyplots.ai" at the top center. A
+    prominent yellow circular marker with blue border highlights the latest data point
+    (32.6%) with a text label above it. The left side features a white gradient fade
+    effect indicating the scrolling direction, accompanied by a blue arrow and "←
+    Data scrolls" annotation. A red "● LIVE" indicator appears in the lower right
+    corner. The legend in the upper right shows "CPU Usage" and "Current: 32.6%".
+    The data exhibits realistic CPU patterns with baseline oscillation around 35-40%
+    and several spikes reaching up to ~75%.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, ticks at 18pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width 3 is appropriate, latest marker clearly visible at size
+          18
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout but slight imbalance with fade effect covering data on
+          left
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Time (seconds)", "CPU Usage (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is very subtle at alpha 0.1, legend well-placed; however the
+          "← Data scrolls" arrow text appears slightly redundant with the fade effect
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart type for real-time visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, CPU usage on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Sliding window effect (fade), latest value indicator, live indicator,
+          scrolling direction arrow all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-105% shows all data, X-axis shows full 120 points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "CPU Usage" and current value
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses descriptive title with spec-id and library, but format is "CPU
+          Usage Monitor · line-realtime · plotly · pyplots.ai" instead of "{spec-id}
+          · {library} · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows baseline oscillation, noise, and CPU spikes; demonstrates real-time
+          monitoring scenario well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: CPU usage monitoring is a real, neutral, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: CPU values 0-100% with realistic patterns (base ~35%, spikes to ~75%)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with fill, add_annotation, add_vrect for fade effect,
+          interactive HTML output; however could have leveraged more Plotly-specific
+          features like animation frames
+  verdict: APPROVED
diff --git a/plots/line-realtime/metadata/pygal.yaml b/plots/line-realtime/metadata/pygal.yaml
index aa44aa9734..568cd9f4df 100644
--- a/plots/line-realtime/metadata/pygal.yaml
+++ b/plots/line-realtime/metadata/pygal.yaml
@@ -23,3 +23,172 @@ review:
     - the trailing edge visual indication could be more prominent
   - X-axis time labels could be slightly larger for better legibility at this canvas
     size
+  image_description: 'The plot displays a CPU Usage Monitor with a filled blue area
+    chart showing CPU usage percentage (0-100%) over time from 14:29:01 to 14:30:00.
+    The chart has a light gray plot background with horizontal gridlines. A dashed
+    yellow/gold horizontal line at 80% indicates the warning threshold. The main CPU
+    usage data shows realistic fluctuations between roughly 20-77%, with several spikes.
+    The rightmost portion (last 5 points) is highlighted in red/coral with larger
+    dots, representing the "LIVE" current data. The legend in the top-left shows three
+    series: "CPU Usage" (blue), "Warning Thresh..." (truncated yellow), and "Current:
+    52.8%" (red). The title "CPU Usage Monitor (Live) · line-realtime · pygal · pyplots.ai"
+    appears at the top. X-axis labels are rotated 45° showing timestamps.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable; x-axis time labels could be slightly
+          larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and dots are visible; fill area works well for data visualization
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red are distinguishable; colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; legend slightly truncated
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "CPU Usage (%)" and "Time (HH:MM:SS)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; legend text truncation ("Warning Thresh...") is not
+          ideal
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart for real-time visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, CPU usage on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has sliding window concept, live indicator, warning threshold; fade
+          effect for older data is implemented but subtle
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-100% correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend labels mostly correct but "Warning Thresh..." is truncated
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "CPU Usage Monitor (Live) · line-realtime ·
+          pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows spikes, fluctuations, trend; one spike crosses warning threshold
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: CPU monitoring is a real, practical use case from spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: CPU values between 5-98% are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal Style customization, fill area, stroke styles, cubic interpolation;
+          good but not exceptional use of pygal features
+  verdict: APPROVED
diff --git a/plots/line-stepwise/metadata/altair.yaml b/plots/line-stepwise/metadata/altair.yaml
index 6a9dc6ca86..e1c7e733a0 100644
--- a/plots/line-stepwise/metadata/altair.yaml
+++ b/plots/line-stepwise/metadata/altair.yaml
@@ -24,3 +24,169 @@ review:
   weaknesses:
   - Data shows predominantly upward trend; could benefit from more varied step patterns
     (both increases and decreases) to better demonstrate discrete state changes
+  image_description: The plot displays a step line chart showing temperature data
+    over 20 days. The line is rendered in a dark blue color (#306998) with step-after
+    interpolation, creating horizontal segments that step up vertically at each data
+    point. The Y-axis shows "Temperature (°C)" ranging from approximately 23°C to
+    35°C. The X-axis shows "Day" from 0 to 20. The title correctly displays "line-stepwise
+    · altair · pyplots.ai" at the top. The background is white with subtle dashed
+    grid lines. The data shows an overall upward trend with discrete step changes,
+    demonstrating the stepwise behavior well.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line strokeWidth=4 is clearly visible and appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" and "Day" with units where appropriate'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate, but no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step line chart with step-after interpolation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Day, Y=Temperature correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows horizontal-then-vertical transitions as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate scale (zero=False)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "line-stepwise · altair · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows step behavior well, but data only goes in one general direction
+          (upward trend)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings is a real, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values 23-35°C are realistic, though the consistent upward
+          trend over 20 days is less typical
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Declarative encoding with interpolate="step-after", tooltips, configure_axis,
+          Title object
+  verdict: APPROVED
diff --git a/plots/line-stepwise/metadata/bokeh.yaml b/plots/line-stepwise/metadata/bokeh.yaml
index 7ad1157a0e..0f9632c36e 100644
--- a/plots/line-stepwise/metadata/bokeh.yaml
+++ b/plots/line-stepwise/metadata/bokeh.yaml
@@ -28,3 +28,184 @@ review:
   - Markers could be slightly larger for better visibility at the target resolution
   - Small random noise in data slightly contradicts the discrete state changes concept
     from the spec
+  image_description: 'The plot displays a step line chart showing CPU Usage (%) over
+    24 hours (Hour of Day). The chart uses a blue color (#306998) for both the step
+    line and circular markers at each data point. The title "line-stepwise · bokeh
+    · pyplots.ai" appears in the top-left corner. The X-axis is labeled "Hour of Day"
+    (ranging 0-23), and the Y-axis is labeled "CPU Usage (%)" (ranging 0-100). The
+    step pattern clearly shows horizontal segments where values remain constant, with
+    vertical transitions at each hour mark. The background is a light gray (#fafafa)
+    with dashed grid lines. Data points show a realistic CPU usage pattern: lower
+    usage in early morning hours (~20-35%), rising during work hours (~75-95%), with
+    peaks around hours 5-6 and 14-15.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title, axis labels, and tick marks are all readable. Font sizes
+          are appropriately scaled for the 4800x2700 canvas (28pt title, 22pt labels,
+          18pt ticks). Minor deduction: could be slightly larger for optimal viewing.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. All labels and tick marks are clearly
+          separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Line width of 4 and marker size of 12 are appropriate. The step
+          transitions are clearly visible. Markers help identify exact data points.
+          Minor deduction: markers could be slightly larger.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast against light background.
+          No colorblind concerns with single-series data.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Plot fills canvas well with good margins. Minor deduction: some
+          empty space in upper-right where Bokeh toolbar icons appear to be cut off.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Hour of Day" and "CPU Usage (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid is subtle with alpha=0.3 and dashed style. Minor deduction:
+          no legend present, though acceptable for single-series plot.'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step/staircase line chart with horizontal-then-vertical transitions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (hours), Y=values (CPU usage) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows discrete state changes, horizontal segments for persistence,
+          vertical segments for changes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-105% shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no incorrect labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-stepwise · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows various step heights, both increasing and decreasing transitions,
+          range of values. Minor deduction: could show more dramatic step changes
+          to emphasize the discrete nature.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: CPU usage over 24 hours is an excellent, neutral, real-world scenario
+          that naturally exhibits stepwise behavior
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'CPU percentages 0-100% are realistic. Values like ~95% peak and
+          ~20% minimum are plausible. Minor deduction: small random noise added may
+          slightly blur the "discrete state" concept.'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, bokeh components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but also saves plot.html (extra file,
+          minor issue)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource appropriately, figure configuration, and both
+          PNG and HTML export. However, doesn't leverage Bokeh's built-in step line
+          mode or HoverTool for interactivity.
+  verdict: APPROVED
diff --git a/plots/line-stepwise/metadata/highcharts.yaml b/plots/line-stepwise/metadata/highcharts.yaml
index e06a7ac742..7dce69caa8 100644
--- a/plots/line-stepwise/metadata/highcharts.yaml
+++ b/plots/line-stepwise/metadata/highcharts.yaml
@@ -24,3 +24,183 @@ review:
   - Image dimensions slightly off (2561 vs 2700 height)
   - Legend could be slightly larger for better visibility at high resolution
   - Could utilize more Highcharts-specific interactive features like custom tooltips
+  image_description: 'The plot displays a step line chart showing server response
+    time over a 24-hour monitoring period. The chart uses Python blue (#306998) for
+    the line and markers. The title "line-stepwise · highcharts · pyplots.ai" appears
+    at the top with a subtitle "Server Response Time (24-Hour Monitoring)". The X-axis
+    shows "Hour of Day" with time labels from 00:00 to 22:00 in 2-hour intervals.
+    The Y-axis shows "Response Time (ms)" ranging from 40 to 100. The step pattern
+    is clearly visible with horizontal segments showing value persistence and vertical
+    segments showing instantaneous changes. Data points are marked with blue circles.
+    The legend "Response Time" appears in the top-right corner. The chart shows realistic
+    server load patterns: low response times (~48-54ms) during night hours (00:00-06:00),
+    increased times during morning (~72-80ms at 06:00-09:00), lunch peak (~80-82ms
+    at 12:00-14:00), evening peak reaching ~91ms around 17:00-18:00, then declining
+    to ~50-59ms in late evening.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all readable. Font sizes are
+          appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, X-axis labels spaced well with 2-hour
+          intervals.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step lines and markers are clearly visible. Line width of 6 and marker
+          radius of 12 work well.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python blue) is colorblind-safe.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though slight excess margin at top.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Hour of Day" and
+          "Response Time (ms)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate. Legend placement is good but could
+          be slightly larger.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct step line chart with horizontal-then-vertical transitions
+          using `step: "left"`.'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X shows time (hours), Y shows response time values correctly.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step alignment implemented, clear distinction from smooth lines,
+          horizontal segments show persistence.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (40-100) shows all data points appropriately.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels the series as "Response Time".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format `line-stepwise · highcharts · pyplots.ai`.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows step behavior well with discrete changes, varying step heights,
+          and different patterns throughout the day. Could show more dramatic steps.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response time monitoring is an excellent real-world use case
+          for step plots, showing discrete sampling.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times of 45-91ms are realistic for server monitoring, though
+          the range could be slightly wider to show more variation.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions/classes.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for deterministic data.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, highcharts, selenium, etc.).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Note: Image appears to be 4800x2561 instead of 4800x2700, slight
+          dimension mismatch.'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: 'Uses Highcharts `step: "left"` feature correctly. Also saves interactive
+          HTML version. Could utilize more Highcharts-specific features like tooltips
+          or animations.'
+  verdict: APPROVED
diff --git a/plots/line-stepwise/metadata/letsplot.yaml b/plots/line-stepwise/metadata/letsplot.yaml
index 9c57effc42..3ff26c2b2e 100644
--- a/plots/line-stepwise/metadata/letsplot.yaml
+++ b/plots/line-stepwise/metadata/letsplot.yaml
@@ -27,3 +27,175 @@ review:
   - Points at every hour are slightly redundant - could highlight only the change
     points
   - Data pattern is somewhat predictable with changes only at specific scheduled hours
+  image_description: The plot displays a step line chart showing server response time
+    monitoring over a 24-hour period. A dark blue step line (#306998) creates clear
+    horizontal-then-vertical transitions characteristic of step functions. Yellow
+    circular points (#FFD43B) with visible outlines mark each data point at every
+    hour. The title "line-stepwise · letsplot · pyplots.ai" appears at the top in
+    bold. The X-axis is labeled "Hour of Day" ranging from 0 to 24 in increments of
+    3, and the Y-axis is labeled "Response Time (ms)" ranging from approximately 50
+    to 150. The plot uses a minimal theme with subtle gray grid lines. The response
+    time starts at 50ms and shows discrete jumps at hours 6, 9, 12, 15, 18, and 21,
+    eventually reaching peaks of 150ms before declining.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large (~24pt), axis labels are clear (~20pt), tick
+          labels readable (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step line is thick and visible (size=2), points are appropriately
+          sized (size=5), good contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot fills majority of area, minor whitespace
+          at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Response Time (ms)", "Hour of Day"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is appropriately subtle, but no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly uses step line plot with geom_step()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=hours (time), Y=response time (values that change at specific points)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows horizontal segments (value persistence), vertical segments
+          (instantaneous changes), uses direction="hv" for pre-step alignment
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-stepwise · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows discrete state changes at specific times, value persistence
+          between changes, both increases and decreases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response time monitoring is a realistic and neutral scenario,
+          plausible for system performance
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times 50-150ms are realistic, though the pattern is somewhat
+          regular
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_step, theme_minimal, ggsize for sizing,
+          scale_x_continuous for axis breaks, but no advanced lets-plot specific features
+  verdict: APPROVED
diff --git a/plots/line-stepwise/metadata/matplotlib.yaml b/plots/line-stepwise/metadata/matplotlib.yaml
index eeb7ea763b..ba1e4a3ffe 100644
--- a/plots/line-stepwise/metadata/matplotlib.yaml
+++ b/plots/line-stepwise/metadata/matplotlib.yaml
@@ -26,3 +26,173 @@ review:
     be better
   - Could utilize more advanced matplotlib features (annotations at key transitions,
     twin axes for additional metrics)
+  image_description: 'The plot displays a step line chart showing server capacity
+    over a 24-hour period. The chart uses a blue step line (#306998) with yellow/gold
+    circular markers (#FFD43B) at each data point. There is a light blue semi-transparent
+    fill beneath the step curve. The x-axis shows "Hour of Day" with labels from 00:00
+    to 22:00 in 2-hour increments. The y-axis shows "Server Capacity (units)" ranging
+    from 0 to 160+. The title follows the correct format: "line-stepwise · matplotlib
+    · pyplots.ai". A legend in the upper right indicates "Server Capacity". The plot
+    clearly shows the characteristic horizontal-then-vertical step transitions, with
+    capacity at 50 during night hours, ramping up through morning to peak at 150 around
+    midday, then declining back to 50 by evening. Dashed grid lines at alpha 0.3 provide
+    reference without being distracting.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step line at 3.5 width, markers at s=120 are appropriately sized
+          for 24 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "(units)", X-axis is descriptive "Hour of Day"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.3 is subtle, but legend could be better placed (upper
+          left would avoid data area)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step function plot with horizontal-then-vertical transitions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (hours), Y=capacity values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows step alignment ('post'), discrete changes, value persistence
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis limits (0-175 for y, -0.5 to 23.5 for
+          x)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Server Capacity"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-stepwise · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple step heights, upward/downward transitions, plateaus
+          of varying lengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server capacity scaling is a real-world scenario (neutral topic)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Capacity values 50-150 are realistic for server units
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs are current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses ax.step() which is standard matplotlib, but no advanced features
+          like annotations, secondary axes, or custom styling beyond basics
+  verdict: APPROVED
diff --git a/plots/line-stepwise/metadata/plotly.yaml b/plots/line-stepwise/metadata/plotly.yaml
index d8fb516970..9dc007ec72 100644
--- a/plots/line-stepwise/metadata/plotly.yaml
+++ b/plots/line-stepwise/metadata/plotly.yaml
@@ -21,3 +21,164 @@ review:
   weaknesses:
   - 'Minor: Grid/legend criterion loses 2 points due to disabled legend, though acceptable
     for single-series plots'
+  image_description: 'The plot displays a step line chart showing server response
+    time (in milliseconds) over a 24-hour period. The visualization uses a blue color
+    (#306998) for both the stepped line (width 4) and circular markers with white
+    borders. The x-axis shows "Hour of Day" ranging from 0 to 22 in increments of
+    2. The y-axis shows "Response Time (ms)" ranging from approximately 40 to 90.
+    The step pattern clearly shows horizontal segments (constant values) followed
+    by vertical transitions at each hour mark. The data demonstrates a realistic daily
+    pattern: low values (~42-45ms) during early morning hours, gradual increase during
+    morning ramp-up, peak around midday (~90ms at hour 13), and decline in evening
+    back to low values (~43ms). The title "line-stepwise · plotly · pyplots.ai" is
+    centered at the top. White background with subtle gray grid lines. Clean, professional
+    appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 28pt, labels 22pt, ticks 18pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 14 and line width 4 perfect for 24 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Hour of Day" and "Response Time (ms)" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid subtle and good, but legend disabled (acceptable for single
+          series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step line using shape="hv"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=hours, Y=response times correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step alignment, horizontal/vertical transitions clear
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Appropriate axis ranges showing all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Single series, legend correctly hidden
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "line-stepwise · plotly · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Full 24-hour cycle showing low/ramp-up/peak/decline phases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response time monitoring is realistic technical scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 42-90ms response times are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/line-stepwise/metadata/plotnine.yaml b/plots/line-stepwise/metadata/plotnine.yaml
index f532a27c76..eb336b9909 100644
--- a/plots/line-stepwise/metadata/plotnine.yaml
+++ b/plots/line-stepwise/metadata/plotnine.yaml
@@ -23,3 +23,173 @@ review:
   weaknesses:
   - 'Minor: panel_grid_major and panel_grid_minor use element_text() instead of element_line()
     - this does not cause visible issues but is technically incorrect for grid styling'
+  image_description: The plot displays a step line chart showing CPU utilization (%)
+    over a 24-hour period. The line is rendered in a blue color (#306998) with clear
+    horizontal-then-vertical step transitions. The x-axis shows "Hour of Day" ranging
+    from 0 to approximately 23, and the y-axis shows "CPU Utilization (%)" ranging
+    from about 10% to 90%. The title "line-stepwise · plotnine · pyplots.ai" appears
+    at the top. The plot uses a minimal theme with subtle gray grid lines on a white
+    background. The step pattern clearly shows low utilization during night hours
+    (10-15%), a steep morning ramp-up (6-10 hours), sustained high utilization during
+    business hours (75-90%), and gradual wind-down in the evening.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width (size=2) is appropriate, step transitions clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, excellent contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "CPU Utilization (%)" and "Hour of
+          Day"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle, but element_text used incorrectly for panel_grid
+          (should use element_line) - no visual impact but technically incorrect
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step line plot using geom_step
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (hour), Y=value (CPU %) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows horizontal-then-vertical transitions (direction="hv"), discrete
+          state changes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 24-hour range shown, y-axis accommodates all values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-stepwise · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows value persistence (flat segments), instantaneous changes (vertical
+          jumps), various step heights, both increasing and decreasing transitions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server CPU utilization is a perfect real-world example of discrete
+          state monitoring
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: CPU percentages are realistic (10-90%), hour range is logical (0-23)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set (though data is actually deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of plotnine''s grammar of graphics: ggplot + aes +
+          geom_step with direction parameter, theme_minimal, element_text customization'
+  verdict: APPROVED
diff --git a/plots/line-stepwise/metadata/pygal.yaml b/plots/line-stepwise/metadata/pygal.yaml
index c11e38bf23..7ff8fc900b 100644
--- a/plots/line-stepwise/metadata/pygal.yaml
+++ b/plots/line-stepwise/metadata/pygal.yaml
@@ -26,3 +26,172 @@ review:
   - Right margin has slight excess whitespace
   - Grid only shows Y-axis guides; X-axis guides could improve readability of hour
     markers
+  image_description: The plot displays a step line chart showing server response times
+    over a 24-hour period. The line is rendered in Python Blue (#306998) on a white
+    background. The chart clearly shows horizontal segments representing constant
+    response times, with vertical jumps at specific hours (8, 12, 14, 18, 21, 23).
+    The Y-axis ranges from 40ms to 200ms with "Response Time (ms)" label. The X-axis
+    shows hours 0-24 with "Hour of Day" label. Subtle horizontal grid lines aid readability.
+    The title "line-stepwise · pygal · pyplots.ai" appears at the top in the correct
+    format.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable, tick labels well-sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step line is clearly visible with good stroke width, though slightly
+          thin
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, excellent contrast on white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, minor excess margin on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Response Time (ms)", "Hour of Day"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle horizontal grid lines, no legend needed (single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step/stepwise line chart showing horizontal-then-vertical
+          transitions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (hours), Y=values that change at specific points
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows discrete state changes, horizontal segments for persistence,
+          vertical for jumps
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range (40-200ms, 0-24 hours)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series, appropriately hidden
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-stepwise · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple step transitions (up and down), constant periods,
+          but could show more variation patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response time scenario is realistic and neutral (technology
+          domain)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times 40-200ms are realistic, though the jumps are somewhat
+          idealized
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → style → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data generation (no random seed needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (expected for pygal)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Style customization and Line chart, but manual step
+          data construction rather than native pygal feature
+  verdict: APPROVED
diff --git a/plots/line-stepwise/metadata/seaborn.yaml b/plots/line-stepwise/metadata/seaborn.yaml
index bf36fd3304..c03a8b4031 100644
--- a/plots/line-stepwise/metadata/seaborn.yaml
+++ b/plots/line-stepwise/metadata/seaborn.yaml
@@ -24,3 +24,177 @@ review:
     the style correctly (should be called before plt.subplots)
   - Limited use of seaborn-specific features - the implementation relies heavily on
     matplotlib drawstyle parameter passed through seaborn
+  image_description: 'The plot displays a step line chart showing server load percentage
+    (0-100%) over 24 hours (Hour of Day 0-23). A blue step line with `steps-post`
+    style creates clear horizontal segments that persist until the next data point,
+    then jump vertically to the new value. Yellow circular markers (with blue edges)
+    appear at each data point, making the actual measurements visible. The title "line-stepwise
+    · seaborn · pyplots.ai" is displayed at the top. The background uses seaborn''s
+    whitegrid style with subtle dashed grid lines (alpha ~0.3). X-axis ticks appear
+    every 2 hours. The data shows a realistic server load pattern: low overnight (~15-25%),
+    ramping up in morning (~40-70%), peak during business hours (~75-92%), and declining
+    in evening back to ~25%.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step line (linewidth=3) and markers (s=150) are clearly visible,
+          but markers could be slightly smaller for better step visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Hour of Day" and "Server Load (%)" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha is 0.3 which is acceptable, but no legend is needed here
+          (-0 for no legend issue, however `sns.set_style("whitegrid")` is called
+          after subplot creation which may not apply correctly)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step function plot with horizontal-then-vertical transitions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (hours), Y=values that change at specific points
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step alignment (post), clear distinction from smooth interpolation,
+          horizontal segments show persistence, vertical segments show changes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Axes show all data (x: -0.5 to 23.5, y: 0 to 105)'
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-stepwise · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows step transitions, value persistence, discrete jumps; could
+          show multiple step styles (pre/mid/post) but single style is acceptable
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server load over 24 hours is a realistic, neutral scenario - low
+          at night, high during business hours
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Server load 10-100% is realistic; the noise addition with clipping
+          is good, though the base pattern is somewhat predictable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` is set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.pyplot, numpy, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses `sns.lineplot` with drawstyle and `sns.scatterplot`, but drawstyle
+          is a matplotlib passthrough parameter, not a seaborn-native feature. Could
+          have used more seaborn-specific features like statistical aggregation or
+          themed palettes.
+  verdict: APPROVED
diff --git a/plots/line-styled/metadata/altair.yaml b/plots/line-styled/metadata/altair.yaml
index d4c9eba17e..64601d9a79 100644
--- a/plots/line-styled/metadata/altair.yaml
+++ b/plots/line-styled/metadata/altair.yaml
@@ -23,3 +23,175 @@ review:
   - Legend shows only color swatches, not the actual line styles (Altair limitation
     - strokeDash legend combined with color shows color only)
   - No interactive features (tooltips, hover) which are Altair distinctive strength
+  image_description: 'The plot displays a styled line chart showing monthly temperature
+    readings from four weather stations (Coastal, Mountain, Valley, Highland) over
+    12 months (Jan-Dec). The chart uses distinct line styles: solid blue for Coastal,
+    dashed yellow/gold for Mountain, dotted teal/green for Valley, and dash-dot coral/red
+    for Highland. The X-axis shows months from January to December, and the Y-axis
+    shows Temperature (°C) ranging from -4 to 40. All four lines follow a seasonal
+    pattern peaking in summer months (July-August). The title "line-styled · altair
+    · pyplots.ai" is displayed at the top. A legend in the upper-right corner identifies
+    each station with its corresponding color and line style. A subtle grid with dashed
+    lines aids readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, month labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are thick (strokeWidth=4), all styles clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Python Blue, Yellow, Teal, Coral - colorblind-friendly palette
+          with distinct hues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Month" and "Temperature (°C)" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3), but legend symbols don't show line styles,
+          only colors
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with multiple series
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months, Y=temperature values, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All 4 line styles present (solid, dashed, dotted, dash-dot), legend
+          included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range (-5 to 40), all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "line-styled · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows all 4 standard line styles (solid, dashed, dotted, dash-dot),
+          seasonal patterns with variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Weather station temperature data is a real, neutral, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (-2°C to 35°C seasonal range)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only used imports: altair, numpy, pandas'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic Altair mark_line and encode. Could leverage Altair's interactive
+          features, tooltips, or selection mechanisms, but keeps it simple for styled
+          line display
+  verdict: APPROVED
diff --git a/plots/line-styled/metadata/bokeh.yaml b/plots/line-styled/metadata/bokeh.yaml
index 6c029bffea..0a9af4eddb 100644
--- a/plots/line-styled/metadata/bokeh.yaml
+++ b/plots/line-styled/metadata/bokeh.yaml
@@ -25,3 +25,174 @@ review:
   - Could add HoverTool for interactivity to better leverage Bokeh's strengths
   - Data scenario is somewhat generic - Monthly performance metrics could be more
     specific
+  image_description: 'The plot displays four line series representing system performance
+    metrics (CPU Usage, Memory Usage, Disk I/O, and Network Traffic) over 12 months
+    (Jan-Dec). The title "line-styled · bokeh · pyplots.ai" is centered at the top.
+    Each series uses a distinct color: blue (solid) for CPU, yellow (dashed) for Memory,
+    green (dotted) for Disk I/O, and orange (dash-dot) for Network Traffic. All lines
+    have circular markers at data points. The legend is positioned in the top-left
+    inside the plot area with a white background. The Y-axis shows "Utilization (%)"
+    ranging from ~60-90, and the X-axis shows month abbreviations. Grid lines are
+    subtle and dashed. The background is a light gray (#fafafa).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 48pt, axis labels 36pt, tick labels 28pt - all perfectly readable
+          at 4800×2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, legend is well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 6 and marker size 25 are well-suited for the canvas size
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, orange palette is colorblind-friendly with good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Utilization (%)" has units but "Month" is generic without clarification
+          of year/period'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3), but dashed grid can visually compete
+          with dashed line styles
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with multiple series
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is time (months), Y is continuous metric values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All four standard line styles present (solid, dashed, dotted, dash-dot)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps styles to series names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "line-styled · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows all four line styles clearly distinguishable, with varying
+          trends
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Server performance metrics is plausible but somewhat generic; could
+          be more specific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Utilization percentages in 60-90% range are realistic for server
+          metrics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and custom Legend, but doesn't leverage Bokeh's
+          interactive features like HoverTool
+  verdict: APPROVED
diff --git a/plots/line-styled/metadata/highcharts.yaml b/plots/line-styled/metadata/highcharts.yaml
index 2ff4995a8c..f61cb32547 100644
--- a/plots/line-styled/metadata/highcharts.yaml
+++ b/plots/line-styled/metadata/highcharts.yaml
@@ -28,3 +28,186 @@ review:
     alone
   - Grid line alpha could be slightly higher (0.2-0.3) for better readability while
     maintaining subtlety
+  image_description: "The plot displays a styled line chart showing monthly average\
+    \ temperatures for 4 European cities (Madrid, Berlin, Edinburgh, Oslo) over a\
+    \ 12-month period. The chart has a white background with the title \"line-styled\
+    \ · highcharts · pyplots.ai\" at the top and a subtitle \"Monthly Average Temperature\
+    \ by City\". \n\n**Line Styles Visible:**\n- **Madrid** (dark blue): Solid line\
+    \ with circular markers, highest temperatures (peak ~35°C in August)\n- **Berlin**\
+    \ (yellow/gold): Dashed line with circular markers, moderate temperatures (peak\
+    \ ~31°C in July)\n- **Edinburgh** (purple): Dotted line with square markers, cooler\
+    \ temperatures (peak ~25°C in July)\n- **Oslo** (cyan/teal): Dash-dot line with\
+    \ triangular markers, coldest (peak ~18°C in July, drops to ~-4°C in winter)\n\
+    \nThe Y-axis shows \"Temperature (°C)\" ranging from -7 to 37, and the X-axis\
+    \ shows months from Jan to Dec. A vertical legend on the right displays all 4\
+    \ cities with their corresponding line styles. Grid lines are subtle. All four\
+    \ line styles (solid, dashed, dotted, dash-dot) are clearly distinguishable."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all readable. Font sizes are
+          appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Month labels are well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Lines are thick (lineWidth: 6), markers are appropriately sized
+          (radius: 10), and all elements are clearly visible.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses colorblind-safe palette (#306998, #FFD43B, #9467BD, #17BECF)
+          - no red-green conflicts.'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good margins and spacing. Legend positioned well on the right. Minor
+          deduction as the plot could use slightly more vertical space.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Temperature (°C)" with units, X-axis has "Month".
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: 'Grid is subtle (alpha 0.1-0.2). Legend is well-placed and uses symbolWidth:
+          80 to show line styles clearly. However, the legend line style symbols don''t
+          clearly show the different styles (Dash, Dot, DashDot appear similar to
+          Solid in the legend).'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct chart type: line plot with multiple series.'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (months) and Y (temperature) correctly assigned.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All 4 line styles present (solid, dashed, dotted, dash-dot). Legend
+          included.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis extends from -7 to 37 to cover all cities.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps city names to their line styles.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "line-styled · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows all 4 distinct line styles clearly in the plot area. Slight
+          deduction because the styles could be more exaggerated in terms of dash
+          lengths for even better differentiation.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent realistic scenario: monthly temperature data for 4 European
+          cities with geographically accurate patterns (Madrid warmest, Oslo coldest).'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature ranges are realistic. Madrid peaking at ~35°C in August
+          is slightly high for average (not impossible for maximums), but overall
+          plausible.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save. No unnecessary functions/classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible data.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts Core API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts' dash_style property for line differentiation. Could
+          have used more advanced Highcharts features like tooltips or animations,
+          but the core feature (line styling) is well implemented.
+  verdict: APPROVED
diff --git a/plots/line-styled/metadata/letsplot.yaml b/plots/line-styled/metadata/letsplot.yaml
index 121fb121f3..205be85904 100644
--- a/plots/line-styled/metadata/letsplot.yaml
+++ b/plots/line-styled/metadata/letsplot.yaml
@@ -28,3 +28,180 @@ review:
   - All four climate zones follow the same basic seasonal pattern (low in winter,
     high in summer) - could show more variation (e.g., Southern Hemisphere climate
     with inverse pattern)
+  image_description: 'The plot displays four line series representing monthly temperature
+    readings for different climate zones (Coastal, Continental, Mountain, Mediterranean)
+    over 12 months (Jan-Dec). The title "line-styled · letsplot · pyplots.ai" appears
+    at the top in bold black text. The Y-axis shows "Temperature (°C)" ranging from
+    -8 to 30, and the X-axis shows "Month" with abbreviated month names. Each series
+    uses a distinct color AND line style: Coastal (solid blue #306998), Continental
+    (dashed yellow #FFD43B), Mountain (dotted red #DC2626), and Mediterranean (long-dash
+    green #22C55E). Points are marked at each data value with matching colors. A legend
+    titled "Climate Zone" is positioned on the right side with both color and line
+    style indicators. The grid is subtle gray with minor gridlines visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (~28pt), axis labels ~22pt, tick labels ~18pt,
+          all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, month labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are thick (size=2.5), points are visible (size=5), appropriate
+          for 4 series with 12 points each
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses four distinct colors (blue, yellow, red, green) with different
+          line styles as backup - excellent for accessibility
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, legend positioned well on right, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Month" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is somewhat visible (major at 0.5, minor at 0.3) but the legend
+          title says "Climate Zone" while code labels say "Station" in legend_title
+          - minor mismatch, but the legend shows both color and linetype which is
+          correct
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with multiple styled lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is month (continuous/time), Y is temperature (multiple series)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses solid, dashed, dotted, and longdash (dash-dot equivalent) -
+          all 4 standard styles present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from -8 to 30, X-axis shows all 12 months
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps line styles and colors to climate zone names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "line-styled · letsplot · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 4 distinct series with different patterns, though all follow
+          similar seasonal curves (winter low, summer high)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature by climate zone is a real, neutral, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Temperature values are realistic: Mountain (-6 to 16°C), Continental
+          (0 to 21°C), Coastal (5 to 28°C), Mediterranean (7 to 29°C) - all plausible'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only uses needed imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" with path="." which is correct
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_line, geom_point, scale_linetype_manual,
+          scale_color_manual, theme customization. Good use of lets-plot's ggplot2-style
+          API but nothing uniquely distinctive to lets-plot specifically (e.g., could
+          use tooltip for interactivity)
+  verdict: APPROVED
diff --git a/plots/line-styled/metadata/matplotlib.yaml b/plots/line-styled/metadata/matplotlib.yaml
index ca90fb13da..b0b272e848 100644
--- a/plots/line-styled/metadata/matplotlib.yaml
+++ b/plots/line-styled/metadata/matplotlib.yaml
@@ -25,3 +25,175 @@ review:
   - 'Blue color palette has two similar shades (#306998 and #4B8BBE) - while line
     styles make them distinguishable, more contrasting colors would improve color
     accessibility further'
+  image_description: 'The plot displays a styled line chart showing CPU performance
+    benchmark scores over 12 months (January to December). Four processor series are
+    shown, each with a distinct line style: Processor A uses a solid dark blue line
+    with circle markers, Processor B uses a dashed yellow/gold line with square markers,
+    Processor C uses a dotted light blue line with triangle markers, and Processor
+    D uses a dash-dot gray line with diamond markers. The title "line-styled · matplotlib
+    · pyplots.ai" appears at the top. A legend in the upper left corner clearly maps
+    each line style to its series name. The Y-axis shows "Performance Score" ranging
+    from approximately 80 to 135, and the X-axis shows "Month" with abbreviated month
+    names. A subtle dashed grid aids readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 and marker size of 8 are well-suited for 12 data
+          points per series
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable; however blue/light blue could be challenging
+          for some colorblind viewers, though line styles provide redundant encoding
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Month", "Performance Score") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), legend well-placed in upper left
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot with multiple styled lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time (months), Y=performance scores correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All 4 standard line styles present (solid, dashed, dotted, dash-dot),
+          legend included, consistent line widths
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly match each series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-styled · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows all 4 line styles with clear differentiation, varied trends
+          (upward, fluctuating, declining patterns)
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: CPU benchmark scenario is plausible but somewhat generic; could be
+          more specific (e.g., specific benchmark names)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores starting around 100 and varying within realistic
+          bounds
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib's Axes methods correctly, but no particularly advanced
+          features like custom line styles or annotations
+  verdict: APPROVED
diff --git a/plots/line-styled/metadata/plotly.yaml b/plots/line-styled/metadata/plotly.yaml
index 6fb6ee03e9..28941c1f5c 100644
--- a/plots/line-styled/metadata/plotly.yaml
+++ b/plots/line-styled/metadata/plotly.yaml
@@ -28,3 +28,175 @@ review:
   - Could utilize Plotly-specific features like custom hover templates to enhance
     interactivity
   - Grid alpha at 0.1 is at the very low end of visibility
+  image_description: |-
+    The plot displays a line chart with temperature readings from four different sensors over a 24-hour period (0-23 hours). The chart uses a white background with a subtle grid. Four distinct line styles are clearly visible:
+    - **Outdoor Sensor**: Solid blue line (#306998) showing the largest temperature swing (~7°C to ~23°C)
+    - **Indoor Sensor**: Dashed yellow/gold line (#FFD43B) with moderate variation (~17°C to ~23°C)
+    - **Greenhouse Sensor**: Dotted green line (#4CAF50) with medium variation (~12°C to ~22°C)
+    - **Storage Sensor**: Dash-dot purple line (#9C27B0) with the smallest variation (~20°C to ~24°C)
+
+    The title "line-styled · plotly · pyplots.ai" is centered at the top. The x-axis shows "Hour of Day" (0-23 in increments of 4), and the y-axis shows "Temperature (°C)" (6-24). A well-positioned legend in the upper-left corner clearly maps each line style to its sensor name.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title at 32pt, axis labels at 24pt,
+          tick labels at 20pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend is positioned in clear space
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 4 is excellent for the data density (24 points), all
+          styles clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color choices with distinct hues; blue/purple could be slightly
+          more differentiated for some colorblind types
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization with balanced margins, plot fills ~60%
+          of canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Hour of Day" and "Temperature (°C)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.1 is appropriately subtle; legend is well-placed
+          with semi-transparent background
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (hours) and Y (temperature) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All four line styles present: solid, dashed, dotted, dash-dot'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately maps styles to series names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-styled · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows all four line styles clearly; each series has different amplitude/phase
+          characteristics demonstrating the feature well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature sensors across different environments (outdoor, indoor,
+          greenhouse, storage) is a realistic and neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Temperature values are realistic: outdoor shows typical diurnal
+          swing, indoor is more stable, etc.'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses Plotly's Graph Objects correctly with proper trace management;
+          exports HTML for interactivity. However, doesn't leverage more advanced
+          Plotly features like hover templates or rangeslider.
+  verdict: APPROVED
diff --git a/plots/line-styled/metadata/plotnine.yaml b/plots/line-styled/metadata/plotnine.yaml
index b5b34e5852..93921b5db9 100644
--- a/plots/line-styled/metadata/plotnine.yaml
+++ b/plots/line-styled/metadata/plotnine.yaml
@@ -25,3 +25,174 @@ review:
   - Yellow line (Product B) has reduced contrast against the light background
   - X-axis shows decimal quarters (2.5, 5.0, etc.) rather than meaningful labels like
     Q1 2022
+  image_description: 'The plot shows a styled line chart with four product lines (A,
+    B, C, D) displaying quarterly sales data over 12 quarters. The title reads "line-styled
+    · plotnine · pyplots.ai" at the top. The y-axis is labeled "Sales (thousands USD)"
+    and ranges from approximately 80 to 150. The x-axis shows "Quarter" from 1 to
+    12.5. Each product line uses a distinct line style: Product A (solid dark blue),
+    Product B (dashed yellow/gold), Product C (dotted green), and Product D (dash-dot
+    pink). The legend is positioned on the right side with clear labels. The plot
+    uses a minimal theme with subtle gray grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend text are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are clearly visible with size=2, though some line styles (dotted)
+          could be slightly thicker for better visibility at high density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are generally distinct, though the yellow on white background
+          has reduced contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space with well-placed legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sales (thousands USD)" and "Quarter"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid appears very faint/barely visible (alpha too low), legend placement
+          is good
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct styled line plot with multiple series
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (Quarter) and Y (Sales) correctly assigned with grouping by Product
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All four standard line styles present (solid, dashed, dotted, dash-dot)
+          with legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps styles and colors to product names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-styled · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows all four line styles with varying trends (growth, decline,
+          volatility), though trends could be more distinctly different
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly product sales is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Sales values are reasonable but starting all products near 100 makes
+          initial differentiation harder
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with scale_linetype_manual and scale_color_manual,
+          but doesn't leverage any unique plotnine features beyond basic grammar of
+          graphics
+  verdict: APPROVED
diff --git a/plots/line-styled/metadata/pygal.yaml b/plots/line-styled/metadata/pygal.yaml
index 14e2bf396b..28755be426 100644
--- a/plots/line-styled/metadata/pygal.yaml
+++ b/plots/line-styled/metadata/pygal.yaml
@@ -26,3 +26,184 @@ review:
   - Color palette includes red-green combination; while line styles provide differentiation,
     a more colorblind-friendly palette would be better
   - Could showcase more pygal-specific features like tooltips or value labels
+  image_description: "The plot displays a styled line chart showing temperature measurements\
+    \ from 4 sensors over 12 months (January to December). The title \"line-styled\
+    \ · pygal · pyplots.ai\" appears at the top. The y-axis displays \"Temperature\
+    \ (°C)\" ranging from approximately 3-28°C, and the x-axis shows \"Month\" with\
+    \ all 12 months labeled clearly. Four data series are shown with distinct line\
+    \ styles:\n- **Sensor A (Solid)** - Blue solid line with circular markers\n- **Sensor\
+    \ B (Dashed)** - Yellow dashed line with circular markers  \n- **Sensor C (Dotted)**\
+    \ - Red dotted line with circular markers\n- **Sensor D (Dash-Dot)** - Green dash-dot\
+    \ line with circular markers\n\nThe legend is positioned in the upper left corner\
+    \ outside the plot area. All four line styles are clearly distinguishable from\
+    \ each other. The data shows a realistic seasonal temperature pattern (low in\
+    \ winter months, peaking in July). The plot has a clean white background with\
+    \ subtle horizontal grid lines."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are clearly readable at full
+          size. Legend text is slightly small but still legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere in the plot.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines and markers are well-sized and clearly visible. Line thickness
+          and marker size are appropriate.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are reasonably distinguishable (blue, yellow, red, green),
+          but red-green combination could be challenging for some colorblind viewers.
+          However, line styles provide additional differentiation.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with plot filling most of the canvas. Balanced margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Temperature (°C)"
+          and "Month".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with only y-guides shown. Legend placement in upper
+          left is acceptable but overlaps with plot area slightly.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart with multiple styled lines.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows time (months), Y-axis shows continuous values (temperature).
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All 4 standard line styles present: solid, dashed, dotted, dash-dot.
+          Legend maps styles to series names.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within the axis range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies each series with its line style.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-styled · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows all 4 line styles with varying but similar trends. Could benefit
+          from slightly more variation between series.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature sensor readings over a year is a realistic, neutral scenario
+          applicable to climate monitoring.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values are realistic for a temperate climate (5-28°C).
+          Some values in Sensor C dip quite low (~1°C in December) which is plausible
+          but at the edge.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save. No functions
+          or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html, which is correct for pygal.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's stroke_style with dasharray for line styles, custom
+          Style class for theming, and both PNG and HTML outputs. Could leverage more
+          advanced features like tooltips or animations.
+  verdict: APPROVED
diff --git a/plots/line-styled/metadata/seaborn.yaml b/plots/line-styled/metadata/seaborn.yaml
index b524ec7fa9..9f6736ccf7 100644
--- a/plots/line-styled/metadata/seaborn.yaml
+++ b/plots/line-styled/metadata/seaborn.yaml
@@ -24,3 +24,172 @@ review:
   - Could leverage seaborn native palette system instead of manually specifying colors
   - Data context is generic (Coastal, Continental) rather than specific real-world
     regions
+  image_description: 'The plot displays four line series representing temperature
+    trends across 12 months (Jan-Dec) for different climate regions. The lines use
+    distinct styles: Coastal (solid blue), Continental (dashed yellow/gold), Mountain
+    (dotted green), and Mediterranean (dash-dot red/crimson). The title reads "line-styled
+    · seaborn · pyplots.ai" at the top. X-axis shows month abbreviations, Y-axis shows
+    "Temperature (°C)" ranging from approximately -5 to 33. All four series show the
+    expected seasonal pattern with peaks in July-August. The legend is positioned
+    in the upper right with a semi-transparent background. A subtle dashed grid helps
+    with value reading.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, ticks at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines at 3.5pt width are clearly visible and distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, green, red) with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Month" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid at alpha=0.3 is good, but legend duplicates line styles within
+          the legend entries when sns.lineplot creates them
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months, Y=temperature correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All four line styles (solid, dashed, dotted, dash-dot) present with
+          legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range from -5 to 33°C
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps styles to region names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: line-styled · seaborn · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows all four standard line styles clearly differentiated
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Temperature trends by climate region is plausible but could be more
+          specific (e.g., actual regions)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (-3°C mountain winter to 33°C Mediterranean
+          summer)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib.pyplot, numpy, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.lineplot with sns.set_style, but could leverage more seaborn-specific
+          features like themes or palettes
+  verdict: APPROVED
diff --git a/plots/line-timeseries-rolling/metadata/altair.yaml b/plots/line-timeseries-rolling/metadata/altair.yaml
index bf30a1ceae..01f1e435a0 100644
--- a/plots/line-timeseries-rolling/metadata/altair.yaml
+++ b/plots/line-timeseries-rolling/metadata/altair.yaml
@@ -23,3 +23,174 @@ review:
   weaknesses:
   - Legend placement overlaps slightly with data points in the upper-right corner;
     could use orient=bottom or place outside the plot area
+  image_description: 'The plot displays a time series of daily temperature data over
+    approximately 6 months (January to late June 2024). It features two overlaid lines:
+    a thin, semi-transparent blue line representing "Raw Data" showing daily fluctuations
+    with considerable noise, and a prominent thicker yellow/gold line representing
+    the "7-Day Rolling Average" that smoothly reveals the underlying seasonal warming
+    trend. The temperature rises from around -2°C in winter to approximately 22°C
+    in early summer. The chart has a white background with subtle dashed grid lines,
+    clear axis labels with units ("Date" and "Temperature (°C)"), and a well-positioned
+    legend in the top-right corner. The title follows the required format.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis date labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are well-differentiated with appropriate stroke widths (1.5
+          vs 4) and opacity (0.5 vs 1.0); raw data could be slightly more visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, balanced margins, plot fills appropriate space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Date" and "Temperature (°C)" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed style and 0.3 opacity; however legend
+          overlaps with data points in the upper-right area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart with two series
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X, temperature values on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Raw data with lighter style, rolling average prominent, legend distinguishes
+          series, window size in legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data properly with zero=False for better visualization
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Raw Data" and "7-Day Rolling Average"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-timeseries-rolling · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal trend well, noise vs smoothed pattern clear; could
+          show more dramatic short-term spikes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature data with seasonal variation is a perfect realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values from -8°C to 30°C are realistic for a temperate
+          climate winter-to-summer transition
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with scale_factor=3.0
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Altair''s declarative grammar: alt.condition for
+          conditional styling, pd.melt for tidy data format, proper encoding types
+          (:T, :Q, :N), configure_axis for styling'
+  verdict: APPROVED
diff --git a/plots/line-timeseries-rolling/metadata/bokeh.yaml b/plots/line-timeseries-rolling/metadata/bokeh.yaml
index cc2189c191..b210e048ec 100644
--- a/plots/line-timeseries-rolling/metadata/bokeh.yaml
+++ b/plots/line-timeseries-rolling/metadata/bokeh.yaml
@@ -27,3 +27,177 @@ review:
     image
   - Could add HoverTool to display exact date and temperature values on hover for
     better interactivity
+  image_description: The plot displays a time series visualization of daily temperature
+    data spanning January 2024 to January 2025. A semi-transparent blue line shows
+    raw daily temperature readings with visible day-to-day noise/volatility. A prominent
+    smooth yellow/gold line overlays the raw data showing the 30-day rolling average.
+    The visualization clearly demonstrates a seasonal temperature pattern, with values
+    starting around 5-10°C in January, rising to peak around 35°C in July (summer),
+    then declining back toward 0-5°C by December. The title "line-timeseries-rolling
+    · bokeh · pyplots.ai" appears in the top left. A legend distinguishes "Raw Data"
+    (blue) from "30-Day Rolling Average" (yellow). The x-axis shows month labels (Jan
+    2024 through Jan 2025), and the y-axis displays "Temperature (°C)" ranging from
+    0 to ~45. A subtle grid with dashed lines aids readability. The rolling average
+    line is shorter at both ends due to the centered window calculation.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are readable; legend text could
+          be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines well-sized; raw data alpha=0.5 appropriate for density; rolling
+          avg line prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe (not red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" and "Date" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but legend labels appear quite small
+          relative to canvas size
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot with rolling average overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates on X-axis, temperature values on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Raw data line, rolling average line, legend with window size, grid
+          lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Raw Data" and "30-Day Rolling Average"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-timeseries-rolling · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal trend, noise reduction, rolling avg shorter than raw
+          data
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings over a year is a perfect, neutral real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range 0-45°C is realistic; some values slightly exceed
+          typical range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource properly, interactive tools, HTML output; could
+          leverage HoverTool for showing exact values
+  verdict: APPROVED
diff --git a/plots/line-timeseries-rolling/metadata/highcharts.yaml b/plots/line-timeseries-rolling/metadata/highcharts.yaml
index 55324c69fe..6442e1f4d2 100644
--- a/plots/line-timeseries-rolling/metadata/highcharts.yaml
+++ b/plots/line-timeseries-rolling/metadata/highcharts.yaml
@@ -25,3 +25,177 @@ review:
   - Y-axis grid line alpha at 0.3 is slightly too prominent (0.2 would be more subtle)
   - Raw temperature line could be slightly thicker (current 3px) for better visibility
     against the rolling average
+  image_description: The plot displays a time series of daily temperature readings
+    over approximately 6 months (January to late June 2024). A semi-transparent blue
+    line represents the raw temperature data showing daily volatility ranging from
+    about 4°C to 35°C. A bold yellow/gold line overlays the raw data showing the 7-day
+    rolling average, which smoothly reveals the seasonal warming trend from ~10°C
+    in January to ~27°C by late April/May. The title "line-timeseries-rolling · highcharts
+    · pyplots.ai" appears at the top with a subtitle "Daily Temperature with 7-Day
+    Rolling Average". The x-axis shows dates with rotated labels every 2 weeks, and
+    the y-axis displays "Temperature (°C)". A legend in the top-right corner clearly
+    distinguishes "Raw Temperature" from "7-Day Rolling Average". Subtle grid lines
+    are present on both axes.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 56px, axis labels at 40px, tick labels at 32px - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, rotated x-axis labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Raw data line visible with good alpha, rolling average prominent.
+          Minor: raw line could be slightly more prominent'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid present but y-axis grid at 0.3 alpha is slightly prominent;
+          legend well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line chart with rolling average overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, temperature values on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Raw data line, rolling average line, legend with window size, grid
+          lines all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Raw Temperature" and "7-Day Rolling Average"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-timeseries-rolling · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows daily volatility AND smooth trend. Rolling average shorter
+          than raw data. Minor: could show more extreme volatility periods'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings with seasonal trend is a real, neutral
+          scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range 4-35°C is realistic for temperate climate, though
+          35°C is on the high end
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script structure: imports → data → options → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, pandas, json, selenium, etc.)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts and Selenium APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html (correct outputs)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts datetime axis, multiple series, custom styling. Could
+          leverage more interactive features like tooltips configuration
+  verdict: APPROVED
diff --git a/plots/line-timeseries-rolling/metadata/letsplot.yaml b/plots/line-timeseries-rolling/metadata/letsplot.yaml
index 3da7d3d14c..d632890f1b 100644
--- a/plots/line-timeseries-rolling/metadata/letsplot.yaml
+++ b/plots/line-timeseries-rolling/metadata/letsplot.yaml
@@ -26,3 +26,178 @@ review:
     a unified legend - consider using guides() to merge them
   - Raw data line could be slightly thicker for better visibility while maintaining
     transparency
+  image_description: The plot displays a time series of simulated daily sensor temperature
+    readings spanning January to July 2024. The raw data is shown as a semi-transparent
+    blue line with noticeable day-to-day volatility ranging from approximately 12°C
+    to 33°C. Overlaid is a thick yellow/gold line representing the 14-Day Rolling
+    Average, which smoothly reveals the underlying seasonal warming trend (rising
+    from ~16°C in January to ~26°C in May, then declining). The title "line-timeseries-rolling
+    · letsplot · pyplots.ai" appears at the top. The x-axis shows "Date" with monthly
+    labels (Jan-Jul), and the y-axis shows "Temperature (°C)". A legend at the top
+    clearly distinguishes "Raw Data" from "14-Day Rolling Avg". Grid lines are subtle
+    gray. The rolling average line is shorter than the raw data (starts mid-January)
+    as expected due to the window requirement.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend all clearly readable at
+          full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Raw data line is appropriately thin and semi-transparent, rolling
+          average is prominent; minor deduction as raw data could be slightly more
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle, but legend shows 3 separate rows instead of merged
+          legend entries
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line chart with rolling average overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X, temperature values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Raw data with lighter style, prominent rolling average, legend with
+          window size, grid lines present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible on both axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly labels "Raw Data" and "14-Day Rolling Avg"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: `line-timeseries-rolling · letsplot · pyplots.ai`'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation, noise, and underlying trend; could show
+          more extreme outliers to demonstrate smoothing effect better
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature sensor readings is a perfect, neutral real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for temperature (12-33°C), though 180 days from
+          Jan 1st would end in late June, not July
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used and all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as `plot.png` but uses `path="."` which works, though unconventional
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2-style grammar with scale_manual functions, theme customization,
+          and proper ggsave with scale parameter; could leverage more interactive
+          features
+  verdict: APPROVED
diff --git a/plots/line-timeseries-rolling/metadata/matplotlib.yaml b/plots/line-timeseries-rolling/metadata/matplotlib.yaml
index 75457fcdee..48f604a577 100644
--- a/plots/line-timeseries-rolling/metadata/matplotlib.yaml
+++ b/plots/line-timeseries-rolling/metadata/matplotlib.yaml
@@ -26,3 +26,176 @@ review:
     better contrast with rolling average
   - Consider adding a subtle shaded region between raw and rolling average to emphasize
     the smoothing effect
+  image_description: The plot displays a time series of daily temperature data over
+    6 months (January 2024 to July 2024). The raw data is shown as a thin, semi-transparent
+    blue line (#306998) with high-frequency fluctuations representing day-to-day temperature
+    variations. Overlaid on top is a thick yellow/gold line (#FFD43B) representing
+    the 7-day rolling average, which smoothly traces the underlying seasonal trend.
+    The y-axis shows "Temperature (°C)" ranging from about -5°C to 25°C, and the x-axis
+    shows dates with monthly labels rotated at 30 degrees. A legend in the upper left
+    clearly identifies "Daily Temperature" and "7-Day Rolling Average". The title
+    follows the required format "line-timeseries-rolling · matplotlib · pyplots.ai".
+    A subtle dashed grid (alpha=0.3) aids in reading values.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt, legend at 16pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, date labels properly rotated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines well-sized, though raw data could be slightly thinner for more
+          contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, balanced margins with tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" with units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3, legend well-placed but could be positioned
+          better to not overlap potential data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series with rolling average overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, temperature values on y-axis, rolling average computed
+          correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Raw data line (semi-transparent), rolling average (prominent), legend
+          with window size, grid on both axes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly identifies "Daily Temperature" and "7-Day Rolling Average"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "line-timeseries-rolling · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal trend, daily noise, and smoothing effect well; could
+          show more dramatic trend reversal
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings with seasonal pattern is a perfect, neutral
+          real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (-5°C to 25°C) realistic for temperate climate
+          winter-to-summer transition
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, pandas used and all needed
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Standard matplotlib usage, no distinctive features like date formatters,
+          custom styles, or advanced annotations
+  verdict: APPROVED
diff --git a/plots/line-timeseries-rolling/metadata/plotly.yaml b/plots/line-timeseries-rolling/metadata/plotly.yaml
index d6623e7406..2345a420d4 100644
--- a/plots/line-timeseries-rolling/metadata/plotly.yaml
+++ b/plots/line-timeseries-rolling/metadata/plotly.yaml
@@ -26,3 +26,179 @@ review:
     readability
   - Could leverage more Plotly-specific features like customized hover templates showing
     both raw and rolling values, or a range selector for interactivity
+  image_description: 'The plot displays a time series of daily temperature data from
+    January 2024 to July 2024. The raw data is shown as a light blue/semi-transparent
+    line with noticeable daily fluctuations (noise), while a prominent yellow/gold
+    line (width 4) represents the 14-Day Rolling Average smoothly tracing through
+    the data. The temperature range spans from approximately -10°C to 40°C, showing
+    a clear seasonal warming trend from winter to summer. The title "line-timeseries-rolling
+    · plotly · pyplots.ai" is centered at the top. The legend is positioned in the
+    upper left corner with a subtle white background, clearly distinguishing "Raw
+    Data" from "14-Day Rolling Average". Both axes have proper labels: "Date" on the
+    x-axis and "Temperature (°C)" on the y-axis. The grid is subtle with low alpha
+    values. The plot uses a white background (plotly_white template).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Raw data line appropriately thin with alpha=0.4, rolling average
+          prominent with width=4. Minor deduction: raw data could be slightly more
+          visible'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, proper margins, balanced layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), legend well-placed with background. However,
+          grid at alpha=0.1 is almost too subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot with rolling average overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, temperature values on y-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Raw data line, rolling average line, legend with window size, both
+          visible
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Raw Data" and "14-Day Rolling Average" correctly labeled'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "line-timeseries-rolling · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows seasonal trend, noise reduction via rolling average, appropriate
+          data density (200 points). Rolling average starts after window period. Minor:
+          could show more extreme peaks/dips'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature monitoring is a realistic and neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (-10°C to ~38°C), though peak summer
+          temps of 38°C are on the high end for typical weather data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and plotly.graph_objects - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Figure and go.Scatter with proper configuration, produces
+          interactive HTML. Could leverage more Plotly features like hover info customization
+          or range sliders
+  verdict: APPROVED
diff --git a/plots/line-timeseries-rolling/metadata/plotnine.yaml b/plots/line-timeseries-rolling/metadata/plotnine.yaml
index 5e480d9e69..6e4f8e343d 100644
--- a/plots/line-timeseries-rolling/metadata/plotnine.yaml
+++ b/plots/line-timeseries-rolling/metadata/plotnine.yaml
@@ -23,3 +23,164 @@ review:
   weaknesses:
   - X-axis date labels are slightly crowded with 7-day breaks over 6 months - could
     use monthly breaks for cleaner appearance
+  image_description: 'The plot displays a time series of daily temperature data from
+    January 2024 to July 2024. The raw daily temperature data is shown as a thin blue/gray
+    line with semi-transparency (alpha ~0.5), exhibiting significant day-to-day fluctuations
+    typical of temperature readings. Overlaid on top is a prominent yellow/golden
+    line representing the 7-day rolling average, which is thicker and fully opaque.
+    The chart shows a clear seasonal trend: temperatures start near 0°C in January,
+    rise steadily through spring, peak around 20-25°C in late April/May, and begin
+    declining into summer. The title "line-timeseries-rolling · plotnine · pyplots.ai"
+    appears at the top. X-axis shows dates labeled by month (Jan 2024, Feb 2024, etc.)
+    with rotated labels. Y-axis shows "Temperature (°C)" ranging from approximately
+    -5 to 28. A legend on the right distinguishes "Daily Temperature" and "7-Day Rolling
+    Average". The grid is subtle with light gray lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, rotated x-axis labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines well-sized, raw data appropriately transparent, rolling average
+          prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast, distinguishable for colorblind
+          users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Temperature (°C)" and "Date"'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot with rolling average overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X, Temperature on Y, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Raw data with transparency, prominent rolling average, clear legend
+          with window size
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend clearly labels "Daily Temperature" and "7-Day Rolling Average"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "line-timeseries-rolling · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both raw volatility and smoothed trend, seasonal pattern clearly
+          visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings are a real, neutral, comprehensible scenario
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used, no extraneous dependencies
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of plotnine''s ggplot2-style grammar: aesthetic mapping
+          (aes), scale_*_manual functions, theme customization, proper long-format
+          data transformation'
+  verdict: APPROVED
diff --git a/plots/line-timeseries-rolling/metadata/pygal.yaml b/plots/line-timeseries-rolling/metadata/pygal.yaml
index b38270390a..1598d3d981 100644
--- a/plots/line-timeseries-rolling/metadata/pygal.yaml
+++ b/plots/line-timeseries-rolling/metadata/pygal.yaml
@@ -26,3 +26,187 @@ review:
     while spec suggests grid lines on both axes improve readability
   - Font sizes in custom_style are larger than pygal.md guidelines suggest, though
     this works well for the output
+  image_description: 'The plot displays a time series chart with the title "line-timeseries-rolling
+    · pygal · pyplots.ai" at the top. The Y-axis is labeled "Temperature (°C)" and
+    ranges from 0 to approximately 17. The X-axis is labeled "Date" with rotated labels
+    showing dates from Jan 01 through Apr 15. Two line series are shown: a blue jagged
+    line representing "Raw Temperature" data that shows considerable volatility, and
+    an orange smoother line representing "7-Day Rolling Average" that tracks the underlying
+    trend. The legend is placed at the bottom with both series clearly labeled. The
+    raw data line is thinner while the rolling average line is thicker and more prominent.
+    The overall visual shows a gradual warming trend from about 5°C in January to
+    about 12°C in late April, with the rolling average smoothing out the day-to-day
+    fluctuations. Grid lines are visible on the Y-axis. The plot utilizes a clean
+    white background with good contrast.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend text are all readable at the target
+          resolution. Tick labels are slightly small but acceptable.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; x-axis date labels are well-spaced
+          with bi-weekly intervals.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Both lines are clearly visible. Raw data line is thinner (width 2)
+          and rolling average is thicker (width 6) as intended. Could benefit from
+          slightly more differentiation.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and orange (#E67E22) provide excellent contrast and
+          are colorblind-safe.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with legend at bottom. Minor whitespace
+          on right edge.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Temperature (°C)"
+          and "Date".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis grid is subtle and helpful. Legend is well-positioned at bottom.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart for time series data.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates on X-axis, temperature values on Y-axis, rolling average correctly
+          computed.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Raw data and rolling average overlay both present with clear visual
+          distinction per spec notes.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range from 0 to ~17°C.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Raw Temperature" and "7-Day Rolling Average"
+          with window size included.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows exact format: "line-timeseries-rolling · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows noisy raw data with clear underlying trend revealed by rolling
+          average. Demonstrates seasonal warming pattern. Rolling average correctly
+          starts after window period (None values for first 6 days).
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings with seasonal variation (winter to spring
+          warming) is a realistic and neutral scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values (0-17°C) are realistic for temperate winter-spring
+          transition. Some noise values go slightly negative which is plausible for
+          winter temps.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data generation → rolling average calculation
+          → chart creation → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `random.seed(42)` for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (random, datetime, pygal, Style).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to both plot.html and plot.png correctly.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Style customization, stroke_style for line differentiation,
+          x_labels_major for sparse labeling, legend_at_bottom with columns. Good
+          but not exceptional use of pygal-specific features.
+  verdict: APPROVED
diff --git a/plots/line-timeseries-rolling/metadata/seaborn.yaml b/plots/line-timeseries-rolling/metadata/seaborn.yaml
index ef42b4de36..680baa7c99 100644
--- a/plots/line-timeseries-rolling/metadata/seaborn.yaml
+++ b/plots/line-timeseries-rolling/metadata/seaborn.yaml
@@ -26,3 +26,179 @@ review:
   - Could leverage more seaborn-specific features like confidence intervals or statistical
     annotations
   - Grid legend has framealpha=0.9 which could be reduced slightly for a cleaner look
+  image_description: The plot shows a time series of daily temperature data spanning
+    from January 2024 to July 2024. The raw daily temperature is displayed as a thin,
+    semi-transparent blue line (#306998) with visible daily fluctuations ranging from
+    approximately -5°C to 25°C. Overlaid on top is a prominent golden/yellow (#FFD43B)
+    7-day rolling average line that smoothly traces the underlying seasonal trend
+    from winter cold (around 0°C) through spring warming to summer temperatures (peaking
+    around 20°C). The title "line-timeseries-rolling · seaborn · pyplots.ai" is displayed
+    at the top in bold. The x-axis shows "Date" with monthly tick labels rotated at
+    30 degrees, and the y-axis shows "Temperature (°C)". A legend in the upper left
+    clearly distinguishes "Daily Temperature" from "7-Day Rolling Average". A subtle
+    dashed grid aids readability.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; rotated x-axis labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Raw data line visible with good alpha=0.4; rolling average prominent
+          with linewidth=4; minor deduction as raw line could be slightly more visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha=0.3 and dashed style; legend well-placed
+          but could have slightly better framealpha
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot with rolling average overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, temperature values on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: raw data with light/transparent style,
+          prominent rolling average line, legend distinguishing both, window size
+          in legend'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend clearly shows "Daily Temperature" and "7-Day Rolling Average"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format `{spec-id} · {library} · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal trend, daily noise, and smoothing effect well; could
+          show more dramatic noise events
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature data from winter to summer is a real, comprehensible,
+          neutral scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values range from -5°C to 25°C which is realistic for temperate climate
+          seasonal progression; minor deduction as the seasonal amplitude might be
+          slightly exaggerated for some regions
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` ensures reproducibility'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' correctly ✓ (marking as correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses `sns.lineplot` with DataFrame integration and `sns.set_style`/`sns.set_context`
+          for styling; however, doesn't leverage more advanced seaborn statistical
+          features
+  verdict: APPROVED
diff --git a/plots/line-timeseries/metadata/altair.yaml b/plots/line-timeseries/metadata/altair.yaml
index ab3122bb68..750df723e9 100644
--- a/plots/line-timeseries/metadata/altair.yaml
+++ b/plots/line-timeseries/metadata/altair.yaml
@@ -28,3 +28,173 @@ review:
     conditional formatting
   - Starting price of exactly 100 feels artificial; a more realistic starting value
     would improve authenticity
+  image_description: The plot displays a time series line chart showing stock price
+    data over approximately one year (January 2024 to December 2024). The line is
+    rendered in Python blue (#306998) with consistent stroke width. The y-axis shows
+    "Stock Price ($)" ranging from approximately 76 to 120, and the x-axis shows "Date"
+    with monthly tick labels formatted as "Mon YYYY" (e.g., "Feb 2024", "Mar 2024").
+    The title "line-timeseries · altair · pyplots.ai" appears centered at the top.
+    The x-axis labels are rotated at -45 degrees to prevent overlap. A subtle gray
+    grid is visible on the y-axis. The data shows realistic stock price volatility
+    with an initial decline from ~100 to ~78-80, followed by a recovery and upward
+    trend reaching ~116 in November before slight decline.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: X-axis labels rotated at -45° preventing overlap, no text collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line stroke width of 3 is appropriate for 252 data points, clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single series with Python blue (#306998), excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, minor margin imbalance on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Stock Price ($)" includes units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid only on y-axis, x-axis grid would improve temporal readability
+          per spec
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart for time series data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis (temporal), price on y-axis (quantitative)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smart date formatting ("%b %Y"), grid lines, datetime x-axis
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range with scale(zero=False)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "line-timeseries · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows volatility, trends, and realistic temporal patterns; could
+          show more dramatic features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price simulation with 252 trading days is authentic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Prices in 76-116 range realistic, though starting at exactly 100
+          is slightly artificial
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only imports altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses interactive() and tooltips but doesn't leverage Altair's declarative
+          encoding strengths like selections or layered views
+  verdict: APPROVED
diff --git a/plots/line-timeseries/metadata/bokeh.yaml b/plots/line-timeseries/metadata/bokeh.yaml
index 300a3fd3c0..45c51cf118 100644
--- a/plots/line-timeseries/metadata/bokeh.yaml
+++ b/plots/line-timeseries/metadata/bokeh.yaml
@@ -23,3 +23,178 @@ review:
   weaknesses:
   - Legend text size (28pt) appears small relative to other text elements on the canvas
   - HoverTool adds code complexity but provides no benefit in the static PNG output
+  image_description: The plot displays a time series line chart showing stock price
+    data over one year (January 2024 to January 2025). The line is rendered in a blue
+    color (#306998) on a light gray background (#fafafa). The title "line-timeseries
+    · bokeh · pyplots.ai" appears in the top-left corner. The y-axis is labeled "Stock
+    Price (USD)" with values ranging from approximately 140 to 180. The x-axis is
+    labeled "Date" with month labels (Jan 2024, Mar 2024, May 2024, Jul 2024, Sep
+    2024, Nov 2024, Jan 2025) displayed at a slight angle. The chart shows a realistic
+    stock price pattern with an initial sideways movement, a dip around May 2024,
+    a strong recovery through August, and continued volatility through year-end. A
+    legend labeled "Stock Price" appears in the top-left area. Grid lines are present
+    with dashed styling on both axes.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, tick labels at 28pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Date labels are rotated at 0.8 radians preventing overlap, no text
+          collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Line width of 6 is visible but could be slightly thicker for optimal
+          visibility at 4800x2700
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast on light background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Stock Price (USD)" and "Date" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid alpha at 0.3 is good, but legend is quite small relative to
+          the canvas size
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line chart with datetime x-axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, price on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines on both axes, smart date formatting with month labels,
+          rotated tick labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Stock Price"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend, seasonality, and volatility; could show multiple series
+          for comparison
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price simulation is a classic time series use case with realistic
+          patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Price range 140-180 USD is realistic for a stock, though the variation
+          could be slightly more dramatic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: HoverTool imported and used, but hover functionality not visible
+          in static PNG
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs are current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and HoverTool (bokeh-specific), but hover not
+          visible in static output; saves both PNG and HTML
+  verdict: APPROVED
diff --git a/plots/line-timeseries/metadata/highcharts.yaml b/plots/line-timeseries/metadata/highcharts.yaml
index 313a0bd556..ee3ca12783 100644
--- a/plots/line-timeseries/metadata/highcharts.yaml
+++ b/plots/line-timeseries/metadata/highcharts.yaml
@@ -24,3 +24,180 @@ review:
   - LineSeries imported from highcharts_core.options.series.area instead of highcharts_core.options.series.line
   - Could leverage more Highcharts-specific features like zooming capability or data
     grouping for large datasets
+  image_description: The plot displays a time series line chart showing daily temperature
+    readings throughout 2024. The chart has a white background with a clear title
+    "line-timeseries · highcharts · pyplots.ai" at the top in bold, with a subtitle
+    "Daily Temperature Readings - 2024" below it. The x-axis shows dates from Jan
+    2024 to Jan 2025 with monthly tick intervals, labeled as "Date". The y-axis displays
+    "Temperature (°C)" ranging from approximately -8 to 35 degrees. A single blue
+    line (#306998) traces the temperature throughout the year, showing a clear sinusoidal
+    seasonal pattern - cold winter temperatures around -6 to 5°C in January, rising
+    through spring, peaking around 27-33°C in summer (June-August), then declining
+    through autumn back to winter temperatures. The data shows realistic daily variation
+    (noise) overlaid on the seasonal trend. Grid lines are subtle and visible on both
+    axes. A legend at the bottom shows "Temperature" with a blue line indicator.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels all clearly readable at high resolution
+          with appropriately scaled font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Monthly tick intervals prevent x-axis label overlap, no overlapping
+          elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width of 5px is appropriate for 365 data points, good visibility
+          without markers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color (#306998 Python blue) with high contrast against white
+          background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with appropriate margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Temperature (°C)" with units, X-axis has "Date"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.1), but legend placement at bottom is less
+          than ideal for this chart type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot with datetime x-axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Datetime on X, numeric values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smart date formatting with monthly ticks, grid lines, proper datetime
+          axis
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year of data displayed, axes show complete range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labeled "Temperature"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: line-timeseries · highcharts · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation and daily noise, demonstrates temporal patterns
+          well, but could show more complexity
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature readings for Northern Hemisphere with realistic seasonal
+          pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values realistic (-6 to 33°C) for temperate climate, though extremes
+          are slightly high for a single location
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses random.seed(42) for deterministic data
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: Import from highcharts_core.options.series.area for LineSeries is
+          unusual (should be from .line)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses Highcharts datetime axis and tooltip formatting, but doesn't
+          leverage unique features like zooming, data grouping, or stock chart capabilities
+  verdict: APPROVED
diff --git a/plots/line-timeseries/metadata/letsplot.yaml b/plots/line-timeseries/metadata/letsplot.yaml
index 646cdfd591..756f47da47 100644
--- a/plots/line-timeseries/metadata/letsplot.yaml
+++ b/plots/line-timeseries/metadata/letsplot.yaml
@@ -23,3 +23,172 @@ review:
   weaknesses:
   - Could leverage more lets-plot specific features like tooltips or interactivity
   - Line thickness slightly heavy for 365 data points - could be reduced to 1.0
+  image_description: The plot displays a time series line chart showing daily temperature
+    data over one year (January 2024 to January 2025). The line is rendered in a blue
+    color (#306998) with good visibility. The x-axis shows dates formatted as "Mon
+    YYYY" (e.g., "Jan 2024", "Feb 2024") with labels rotated at 45 degrees for readability.
+    The y-axis shows "Temperature (°C)" ranging from approximately -7 to 35 degrees.
+    The title "line-timeseries · letsplot · pyplots.ai" appears at the top in bold.
+    The data shows a clear seasonal pattern with cold temperatures in winter (around
+    0°C or below), rising through spring, peaking in summer (around 25-35°C), and
+    declining again in fall/winter. Grid lines are visible in light gray. The plot
+    fills the canvas well with balanced margins.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large (24pt), axis labels are 20pt, tick text is
+          16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: X-axis labels are rotated 45° preventing overlap, no text collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width of 1.5 with alpha 0.9 is appropriate for 365 data points,
+          though slightly thick for this density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" with units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is visible but no legend (not needed for single line)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, temperature values on y-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smart date formatting ("%b %Y"), rotated tick labels, grid lines
+          on both axes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 365 days visible, y-axis range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (N/A, full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "line-timeseries · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows temporal patterns well (seasonal cycle, daily variation), but
+          could demonstrate more time series features like trends or anomalies
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings over a year is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range -7 to 35°C is realistic for temperate climate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with scale_x_datetime, but doesn't
+          leverage lets-plot specific interactive features or tooltips
+  verdict: APPROVED
diff --git a/plots/line-timeseries/metadata/matplotlib.yaml b/plots/line-timeseries/metadata/matplotlib.yaml
index 779e5e8b2a..012a3760a9 100644
--- a/plots/line-timeseries/metadata/matplotlib.yaml
+++ b/plots/line-timeseries/metadata/matplotlib.yaml
@@ -23,3 +23,176 @@ review:
   weaknesses:
   - Line could be slightly thicker (3.0 instead of 2.5) for better visibility at full
     resolution with 366 data points
+  image_description: The plot displays a time series line chart showing daily temperature
+    data for the year 2024. A single blue line (#306998) with linewidth 2.5 connects
+    daily temperature readings ranging from approximately -5°C to +35°C. The x-axis
+    shows monthly date labels (Jan 2024 through Dec 2024) rotated 45 degrees for readability.
+    The y-axis shows "Temperature (°C)" with values from roughly -5 to 35. The title
+    reads "Daily Temperature 2024 · line-timeseries · matplotlib · pyplots.ai". A
+    light gray dashed grid (major and minor) aids readability. The plot shows a clear
+    seasonal sinusoidal pattern with peak temperatures in summer (June-August) and
+    lowest temperatures in winter months.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Rotated x-axis labels prevent overlap, no text collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line is clearly visible with good linewidth (2.5), alpha 0.9 provides
+          good contrast; minor deduction for slightly thin line for 366 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout ensures
+          good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units ("Date", "Temperature
+          (°C)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3/0.15) with good styling; no legend needed
+          for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, temperature on y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smart date formatting with MonthLocator, rotated labels, grid lines
+          on both axes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year of data shown with appropriate axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (N/A)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Daily Temperature 2024 · line-timeseries · matplotlib
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation, daily noise, full year cycle; could benefit
+          from showing trend or anomalies
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings over a year is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range (-5°C to 35°C) is realistic for temperate climates;
+          baseline of 12°C with ±15° seasonal swing is plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.dates, pyplot, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of matplotlib.dates with MonthLocator, DateFormatter,
+          and minor locators - these are matplotlib-specific date handling features
+  verdict: APPROVED
diff --git a/plots/line-timeseries/metadata/plotly.yaml b/plots/line-timeseries/metadata/plotly.yaml
index e2e7c3609d..d64de611de 100644
--- a/plots/line-timeseries/metadata/plotly.yaml
+++ b/plots/line-timeseries/metadata/plotly.yaml
@@ -24,3 +24,173 @@ review:
     (rangeselector), or spike lines that are distinctive for time series
   - 'Grid/legend scoring: legend disabled but could optionally show the series name'
   - Line width could be slightly increased (3.0) for better visibility at high resolution
+  image_description: 'The plot displays a time series line chart showing daily temperature
+    readings throughout 2024. The line is a medium blue color (#306998), plotted against
+    a clean white background with subtle gray gridlines. The x-axis shows months from
+    January 2024 to December 2024 with monthly tick marks (format: "Mon YYYY"). The
+    y-axis shows temperature in degrees Celsius ranging from approximately -5°C to
+    35°C. The title "Daily Temperature 2024 · line-timeseries · plotly · pyplots.ai"
+    is centered at the top. The data shows a clear sinusoidal seasonal pattern with
+    temperatures lowest in winter (around -5°C to 5°C in Jan/Dec) and highest in summer
+    (around 25°C to 35°C in Jul/Aug), with realistic daily noise variation.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis titles at 24pt, ticks at 18pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Monthly tick labels well-spaced, no overlapping text
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width 2.5 is appropriate for 366 data points; could be slightly
+          thicker
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions; slight excess margin on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but legend is disabled when it could
+          show "Temperature"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates on x-axis, temperatures on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smart date formatting ("%b %Y"), gridlines on both axes, datetime
+          x-axis
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year visible, y-axis encompasses all values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled (single series, acceptable)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "line-timeseries · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation with noise; could show more dramatic events
+          (e.g., cold snaps)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature for a temperate climate is a perfect real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range (-7°C to 35°C) is realistic for temperate regions
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, pandas, plotly.graph_objects)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses custom hovertemplate for interactivity, but does not leverage
+          other plotly strengths like range slider, range selector buttons, or spike
+          lines
+  verdict: APPROVED
diff --git a/plots/line-timeseries/metadata/plotnine.yaml b/plots/line-timeseries/metadata/plotnine.yaml
index 1e512242fc..d860def5c4 100644
--- a/plots/line-timeseries/metadata/plotnine.yaml
+++ b/plots/line-timeseries/metadata/plotnine.yaml
@@ -21,4 +21,178 @@ review:
   - Realistic stock price simulation with 252 business days (one trading year)
   - Clean minimal theme with appropriate font sizes for high-resolution output
   - Subtle grid lines with appropriate alpha values
-  weaknesses: []
+  weaknesses:
+  - 'Grid scoring: VQ-07 should be 2/2 since no legend is needed and grid is well-configured
+    (correcting my assessment)'
+  image_description: The plot displays a time series line chart showing stock price
+    data over the year 2024 (January through December). The chart uses a blue line
+    (#306998) connecting daily price points, starting around $100 in January, peaking
+    above $105 in early February, then declining through mid-year to approximately
+    $81-82, before recovering to around $95-100 by December. The x-axis shows dates
+    in "Mon YYYY" format (Feb 2024, Apr 2024, Jun 2024, Aug 2024, Oct 2024, Dec 2024)
+    with rotated labels at 45 degrees. The y-axis shows "Stock Price ($)" ranging
+    from approximately 80 to just above 100. The title "line-timeseries · plotnine
+    · pyplots.ai" appears at the top. The plot has a minimal theme with subtle gray
+    grid lines on both axes, and the overall layout is clean with good use of the
+    canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: X-axis labels are rotated 45° to prevent overlap, all text clearly
+          readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line is well-sized (1.5), points at 0.8 are subtle but the line is
+          the focus for time series
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast, no accessibility
+          issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Stock Price ($)" includes units, "Date" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, value (price) on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smart date formatting with month-year labels, grid lines on both
+          axes, rotated tick labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-timeseries · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows uptrend, downtrend, and recovery patterns; could show more
+          pronounced features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price simulation is realistic with random walk pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 252 business days, price range $80-$105 is realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no classes/functions
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API with mizani for date handling
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics with geom_line, geom_point, scale_x_datetime
+          with mizani breaks/labels
+  verdict: APPROVED
diff --git a/plots/line-timeseries/metadata/pygal.yaml b/plots/line-timeseries/metadata/pygal.yaml
index 8c7c4b461f..93c766de9f 100644
--- a/plots/line-timeseries/metadata/pygal.yaml
+++ b/plots/line-timeseries/metadata/pygal.yaml
@@ -27,3 +27,183 @@ review:
     at full resolution
   - HTML output could leverage pygal interactivity features (tooltips, hover effects)
     more explicitly
+  image_description: 'The plot displays a time series line chart showing "ACME Corp
+    Stock" prices over the year 2024 (Jan-Dec). The chart uses a single blue line
+    (Python Blue #306998) on a white background. The title "line-timeseries · pygal
+    · pyplots.ai" appears at the top in black text. The Y-axis shows "Stock Price
+    (USD)" ranging from approximately 150 to 270, and the X-axis shows "Date" with
+    monthly labels (Jan 2024 through Dec 2024) rotated at 45 degrees. The line shows
+    an overall upward trend with realistic daily volatility - starting around 150,
+    dipping slightly in February, then gradually climbing through the year with some
+    corrections (notably in September) before reaching approximately 265 by December.
+    A legend "ACME Corp Stock" appears at the bottom left. Y-axis grid lines (dotted,
+    light gray) provide reference. The chart has good proportions with balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all readable. Font sizes
+          are appropriately scaled for the 4800x2700 canvas, though tick labels could
+          be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text. X-axis labels are rotated 45 degrees and properly
+          spaced (showing only first of each month).
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line is clearly visible with stroke width of 5. No dots shown (appropriate
+          for 365 data points). Line thickness is appropriate for the data density.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single series uses Python Blue, good contrast against white background.
+          No color accessibility concerns.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Plot fills good portion of canvas with reasonable margins. Legend
+          at bottom is well-positioned, though margin=60 is relatively small for such
+          a large canvas.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Y-axis: "Stock Price (USD)" includes units. X-axis: "Date" is descriptive.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis grid is subtle (dotted, light gray). Legend is positioned
+          at bottom. X-guides disabled which is appropriate.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart type for time series data.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates on X-axis, values on Y-axis correctly mapped.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has intelligent date formatting (monthly labels), rotated tick labels,
+          Y-grid lines. Minor: no X-grid lines per spec recommendation.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data from ~145 to ~270.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "ACME Corp Stock".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "line-timeseries · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows upward trend with volatility, dips and recoveries, realistic
+          price movements. Could show more dramatic patterns (e.g., a larger correction).
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price tracking over one year is an excellent, realistic scenario
+          mentioned in spec applications.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Stock prices starting at 150 USD and ranging to ~270 are realistic.
+          365 data points is within spec (30-500 points).
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → seed → data generation → style
+          → chart config → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses random.seed(42) for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only imports used: random, datetime/timedelta, pygal, Style.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as both plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses custom Style class, x_labels_major for intelligent labeling,
+          legend_at_bottom, stroke_style. Could leverage more pygal-specific features
+          like tooltips for interactivity in HTML output.
+  verdict: APPROVED
diff --git a/plots/line-timeseries/metadata/seaborn.yaml b/plots/line-timeseries/metadata/seaborn.yaml
index 6214874f60..2cda3bdc20 100644
--- a/plots/line-timeseries/metadata/seaborn.yaml
+++ b/plots/line-timeseries/metadata/seaborn.yaml
@@ -23,3 +23,162 @@ review:
   - Grid lines appear on both axes but could show minor grid as well
   - Could leverage seaborn distinctive features like confidence intervals or multiple
     series aggregation
+  image_description: The plot shows a time series line chart displaying temperature
+    data (in °C) over a 3-month period from January 2024 to April 2024. The line is
+    rendered in a blue color (#306998) with a linewidth of 3. The x-axis shows monthly
+    date labels ("Jan 2024", "Feb 2024", "Mar 2024", "Apr 2024") rotated at 45 degrees
+    for readability. The y-axis displays "Temperature (°C)" ranging from approximately
+    2.5 to 20°C. The title correctly follows the format "line-timeseries · seaborn
+    · pyplots.ai". A subtle gray dashed grid is present on both axes. The data shows
+    realistic temperature variation with noise and a subtle upward seasonal trend.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Date labels are rotated 45° preventing overlap, all text clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line is well-sized at linewidth=3, visible throughout
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, high contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, minor extra whitespace possible
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" and "Date" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3, but no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, temperature on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smart date formatting with MonthLocator, rotated labels, grid lines
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 90 days of data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "line-timeseries · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows temporal variation, noise, and subtle trend; could show more
+          distinct seasonal pattern
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily temperature readings over 3 months is a real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values (2.5°C to 20°C) are reasonable for winter/spring
+          transition
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/lollipop-basic/metadata/altair.yaml b/plots/lollipop-basic/metadata/altair.yaml
index 69096d9d98..0637c58a9a 100644
--- a/plots/lollipop-basic/metadata/altair.yaml
+++ b/plots/lollipop-basic/metadata/altair.yaml
@@ -26,3 +26,175 @@ review:
     category-value alignment reading
   - Layout could utilize more canvas space (plot area is somewhat compact relative
     to total canvas)
+  image_description: 'The plot displays a lollipop chart showing "Product Sales by
+    Category" with 10 categories on the x-axis and sales values in dollars on the
+    y-axis. Each category has a vertical blue stem (line) extending from 0 to its
+    value, topped with a blue circular dot. Categories are sorted in descending order
+    from left to right: Electronics ($425,000), Clothing ($312,000), Home & Garden
+    ($287,000), Sports ($234,000), Books ($198,000), Toys ($176,000), Beauty ($152,000),
+    Automotive ($134,000), Food & Grocery ($118,000), and Pet Supplies ($95,000).
+    The color used is a consistent blue (#306998) for both stems and dots. X-axis
+    labels are rotated -45 degrees to prevent overlap. The title "lollipop-basic ·
+    altair · pyplots.ai" appears at the top center. A subtle grid is visible on the
+    y-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: X-axis labels rotated -45° to avoid overlap, no text collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stems (strokeWidth=3) and dots (size=400) are clearly visible and
+          well-proportioned for 10 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) is accessible, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though plot could be slightly larger relative
+          to canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales ($)" with currency unit, X-axis has "Category"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (gridOpacity=0.3), but no legend needed for single-series
+          data; grid only on y-axis
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lollipop chart with stems and circular markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thin stems, circular dots, vertical orientation, sorted by value
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-440,000, covering all data points properly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series, N/A applies
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "lollipop-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 10 categories with varying values demonstrating the ranking
+          nature of lollipop charts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in $95K-$425K range are realistic for retail sales data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with scale_factor=3
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative layering with mark_rule + mark_circle,
+          proper encoding types (:N, :Q), and tooltips with formatting. Could have
+          added interactivity.
+  verdict: APPROVED
diff --git a/plots/lollipop-basic/metadata/bokeh.yaml b/plots/lollipop-basic/metadata/bokeh.yaml
index 2811fe8dae..4ef0626619 100644
--- a/plots/lollipop-basic/metadata/bokeh.yaml
+++ b/plots/lollipop-basic/metadata/bokeh.yaml
@@ -23,3 +23,174 @@ review:
   - Redundant sorting code (data is already defined in sorted order)
   - Bokeh toolbar icons visible in top-right corner could be hidden for cleaner static
     output
+  image_description: The plot displays a lollipop chart showing product sales by category.
+    Ten categories are shown on the x-axis (Electronics, Clothing, Home & Garden,
+    Sports, Books, Toys, Food, Beauty, Automotive, Office) with sales values in dollars
+    on the y-axis ranging from $0 to ~$85,000. Each data point consists of a blue
+    vertical stem (#306998) extending from the baseline to a yellow circular marker
+    (#FFD43B) with a blue outline. The data is sorted in descending order from left
+    to right. Category labels are rotated at approximately 40 degrees for readability.
+    The background is light gray (#fafafa) with subtle dashed horizontal grid lines.
+    The title "lollipop-basic · bokeh · pyplots.ai" appears at the top left. Y-axis
+    values are formatted as currency with thousands separators.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable, slightly smaller title would be ideal
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, rotated x-axis labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stems (line_width=4) and markers (size=25) are well-sized for the
+          data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, minor excess whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales ($)" and "Product Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed, grid is subtle (alpha=0.3, dashed), but Bokeh toolbar
+          icons visible in corner
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lollipop chart with stems and circular markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thin stems, circular markers, vertical orientation, sorted data
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from 0 to max value
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "lollipop-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good range of values with clear descending pattern, could show
+          more variation in middle values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values ($15K-$85K) are plausible retail sales figures, though slightly
+          generic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values), but sorting after definition
+          is redundant since data is already sorted
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, segment + scatter glyphs, NumeralTickFormatter
+          - good Bokeh patterns but no interactive features beyond defaults
+  verdict: APPROVED
diff --git a/plots/lollipop-basic/metadata/highcharts.yaml b/plots/lollipop-basic/metadata/highcharts.yaml
index 91f09fa86a..c98073b358 100644
--- a/plots/lollipop-basic/metadata/highcharts.yaml
+++ b/plots/lollipop-basic/metadata/highcharts.yaml
@@ -26,3 +26,176 @@ review:
   - Layout has slight imbalance due to large bottom margin creating extra whitespace
   - 'Could leverage more Highcharts interactive features (tooltips are disabled via
     enableMouseTracking: False on stems)'
+  image_description: The plot displays a basic lollipop chart showing product sales
+    by category. Eight categories are displayed on the x-axis (Electronics, Clothing,
+    Home & Garden, Sports, Books, Toys, Jewelry, Automotive), with "Sales (Units)"
+    on the y-axis ranging from 0 to 9000. Each data point consists of a thin blue
+    vertical stem (#306998) extending from the baseline to a circular blue marker
+    at the top. Data labels are positioned above each marker showing exact values
+    (8,500 down to 1,800). The data is sorted in descending order from left to right.
+    The title "lollipop-basic · highcharts · pyplots.ai" appears at the top. The background
+    is white with subtle dashed gray grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, axis labels at 36px, tick labels at 28px - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (radius 20) and stems (width 4) are perfectly sized for 8
+          data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout but slight margin imbalance, extra bottom margin (250)
+          creates some unused space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales (Units)" with unit, X-axis has "Product Category"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed style, legend disabled (appropriate for
+          single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lollipop chart with stems and circular markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thin stems, circular markers, vertical orientation, sorted data -
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, shows all data with appropriate range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled (appropriate for single-series lollipop)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "lollipop-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows descending sorted data with good variation, but all values
+          are positive (could show more diversity)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a realistic e-commerce scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for sales units, though the range (1,800-8,500)
+          could be slightly more varied
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → chart config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Uses deterministic data (good), but no explicit seed for any random
+          operations
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts scatter+column combination creatively for lollipop
+          effect, but could leverage more Highcharts-specific features like animations
+          or tooltips
+  verdict: APPROVED
diff --git a/plots/lollipop-basic/metadata/letsplot.yaml b/plots/lollipop-basic/metadata/letsplot.yaml
index d71573f520..631b1384dc 100644
--- a/plots/lollipop-basic/metadata/letsplot.yaml
+++ b/plots/lollipop-basic/metadata/letsplot.yaml
@@ -22,3 +22,174 @@ review:
   weaknesses:
   - Y-axis gridlines could be more subtle (add alpha or lighter color)
   - Could demonstrate more lets-plot specific features like interactive tooltips
+  image_description: The plot displays a basic lollipop chart showing product sales
+    by category. Eight categories are shown on the x-axis (Beauty, Grocery, Toys,
+    Books, Sports, Home & Garden, Clothing, Electronics), sorted in ascending order
+    by sales value. Each data point consists of a thin blue vertical stem (#306998)
+    extending from baseline 0 to a filled circular blue dot at the data value. The
+    y-axis shows "Sales ($)" ranging from 0 to 50,000. Category labels are rotated
+    45 degrees for readability. The title "lollipop-basic · letsplot · pyplots.ai"
+    is centered at the top. The layout uses a minimal theme with subtle gray gridlines
+    and no x-axis grid. Overall, the plot is clean, well-proportioned, and effectively
+    communicates the ranking of categories by sales.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Category labels rotated 45° prevent overlap, all text fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stems (size=1.5) and dots (size=8) are well-proportioned for 8 data
+          points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998), no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, minor extra space at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Product Category" and "Sales ($)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed, but y-axis gridlines could be more subtle (currently
+          default)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lollipop chart with stems and circular markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thin stems, circular dots, vertical orientation, sorted by value
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-50,000 shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exactly matches "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 8 categories with clear value differentiation; could show more
+          variation in value gaps
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a real, comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values $9,800-$45,200 are realistic for category-level retail
+          data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed comment/documentation
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (geom_segment, geom_point, theme_minimal), but
+          could leverage more lets-plot specific features like tooltips or coordFlip
+          for horizontal variant
+  verdict: APPROVED
diff --git a/plots/lollipop-basic/metadata/matplotlib.yaml b/plots/lollipop-basic/metadata/matplotlib.yaml
index b6eb1272ef..f75e97d2e6 100644
--- a/plots/lollipop-basic/metadata/matplotlib.yaml
+++ b/plots/lollipop-basic/metadata/matplotlib.yaml
@@ -21,3 +21,171 @@ review:
   weaknesses:
   - No random seed set (though data is deterministic)
   - Grid styling could include minor gridlines for enhanced readability
+  image_description: The plot displays a lollipop chart showing product sales by category.
+    Ten categories are shown on the x-axis (Electronics, Clothing, Home & Garden,
+    Sports, Books, Toys, Beauty, Automotive, Food & Beverages, Office Supplies), sorted
+    in descending order by value. Blue vertical stems (#306998) extend from y=0 to
+    each data point, topped with yellow circular markers (#FFD43B) featuring blue
+    borders. Y-axis shows "Sales (thousands)" ranging from 0 to approximately 95.
+    X-axis labels are rotated 45 degrees for readability. The title "lollipop-basic
+    · matplotlib · pyplots.ai" appears at the top. A subtle dashed horizontal grid
+    (alpha=0.3) aids value reading. The layout is clean with good use of canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; rotated x-labels prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (s=300) are appropriately sized for 10 data points; stems
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; slight extra whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Product Category" and "Sales (thousands)" are descriptive with
+          units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and helpful; no legend needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lollipop chart with stems and circular markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thin stems, circular markers, vertical orientation, sorted by value
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0 to max*1.1, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "lollipop-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good range of values with clear ranking; could benefit from
+          more dramatic spread
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 25-87 thousands are realistic; units clearly specified
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded), but sorting uses np which is reproducible
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses vlines() and scatter() effectively; standard matplotlib usage
+          without advanced features
+  verdict: APPROVED
diff --git a/plots/lollipop-basic/metadata/plotly.yaml b/plots/lollipop-basic/metadata/plotly.yaml
index ad4fada49d..7a0b13bbbf 100644
--- a/plots/lollipop-basic/metadata/plotly.yaml
+++ b/plots/lollipop-basic/metadata/plotly.yaml
@@ -25,3 +25,180 @@ review:
     data would have varied precision
   - Could leverage more plotly-specific features like custom hover modes or improved
     interactivity
+  image_description: The plot displays a basic lollipop chart showing product sales
+    by category. Ten categories are displayed on the x-axis (Electronics, Clothing,
+    Home & Garden, Sports, Books, Toys, Beauty, Automotive, Food & Grocery, Health)
+    with sales values on the y-axis ranging from $0 to approximately $137,500. Each
+    data point consists of a thin blue vertical stem (#306998) extending from the
+    baseline to a circular blue marker with a white border. The title "lollipop-basic
+    · plotly · pyplots.ai" is centered at the top. Category labels are rotated -45
+    degrees. Y-axis shows dollar formatting with grid lines. Data is sorted descending
+    from Electronics ($125,000) to Health ($31,000). The plot uses plotly_white template
+    for a clean background.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title, axis labels, tick labels all
+          appropriately sized for 4800x2700 px output'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels are well-spaced with -45 degree
+          rotation
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (size 18) and stems (width 3) are well-sized for the data
+          density of 10 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe, good contrast against
+          white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions but margins could be slightly optimized; plot fills
+          canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales ($)" and "Product Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is present and subtle, but no legend needed (single series);
+          slight deduction for missing y-axis at 0 baseline emphasis
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lollipop chart with stems and circular markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: thin stems, circular dots, vertical orientation,
+          sorted data'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0 to max*1.1, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series, correctly omitted
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "lollipop-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good range of values across 10 categories; could demonstrate
+          more variation in value gaps
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a realistic, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Sales values from $31K to $125K are realistic; slight deduction as
+          rounding is too clean (all multiples of 1000)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed comment explaining
+          this choice
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of Graph Objects for precise control; hover template is
+          nice; could leverage more plotly-specific features like animations or better
+          interactivity configuration
+  verdict: APPROVED
diff --git a/plots/lollipop-basic/metadata/plotnine.yaml b/plots/lollipop-basic/metadata/plotnine.yaml
index 72c44839b5..23f7680ed5 100644
--- a/plots/lollipop-basic/metadata/plotnine.yaml
+++ b/plots/lollipop-basic/metadata/plotnine.yaml
@@ -22,3 +22,172 @@ review:
   weaknesses:
   - Could use slightly larger markers (size=8 instead of 6) to make the lollipop dots
     more prominent
+  image_description: The plot displays a basic lollipop chart showing product sales
+    by category. Ten categories are shown on the x-axis (Automotive, Garden, Beauty,
+    Toys, Books, Sports, Groceries, Clothing, Furniture, Electronics), sorted in ascending
+    order by value. The y-axis shows "Sales (thousands $)" ranging from 0 to 250.
+    Each data point consists of a thin blue stem (#306998) extending from the baseline
+    (y=0) to a circular blue marker at the data value. The title "lollipop-basic ·
+    plotnine · pyplots.ai" is centered at the top. X-axis category labels are rotated
+    45 degrees for readability. Subtle dashed horizontal grid lines provide value
+    reference. The overall aesthetic is clean and minimalist with good use of whitespace.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, axis text at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: X-axis labels rotated 45° prevent overlap, no text collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stems (size=1.5) and markers (size=6) well-sized for 10 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998), no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, minor extra whitespace
+          at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sales (thousands $)", "Product Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Horizontal grid alpha=0.3 is good, but no legend needed; minor:
+          vertical grid explicitly hidden is fine'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lollipop chart with stems and circular markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thin stems, circular markers, vertical orientation, sorted by value
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis starts at 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "lollipop-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 10 categories with good value variation (58-245), demonstrates
+          ranking well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in thousands are sensible; Electronics at 245k leading is
+          plausible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values, no random)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with geom_segment + geom_point combination,
+          theme_minimal, pd.Categorical for ordering
+  verdict: APPROVED
diff --git a/plots/lollipop-basic/metadata/pygal.yaml b/plots/lollipop-basic/metadata/pygal.yaml
index 8982919b39..29340ae6cb 100644
--- a/plots/lollipop-basic/metadata/pygal.yaml
+++ b/plots/lollipop-basic/metadata/pygal.yaml
@@ -22,3 +22,175 @@ review:
   weaknesses:
   - Missing y_title axis label (could add Product Category or similar)
   - Font sizes in Style could be better optimized for the large canvas
+  image_description: The plot displays a horizontal lollipop chart with 8 product
+    categories (Smartphones, Laptops, Tablets, Headphones, Smartwatches, Cameras,
+    Speakers, Gaming) on the y-axis and sales values ranging from 0 to ~900 units
+    on the x-axis. Each category has a thin horizontal blue line (stem) extending
+    from the baseline (x=0) to a circular blue dot at the data value. The data is
+    sorted in descending order by value (Smartphones highest at ~892, Gaming lowest
+    at ~134). The chart uses a consistent Python Blue (#306998) color for all elements.
+    The title "lollipop-basic · pygal · pyplots.ai" appears at the top. Subtle gray
+    grid lines are visible in the background, and the x-axis label reads "Sales (units)".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title, category labels, and axis labels are
+          clear. Tick labels could be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; horizontal orientation handles category labels
+          well
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dots are clearly visible with appropriate size (dots_size=24), stems
+          are thin but visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight excess whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "Sales (units)" with units; Y-axis categories are descriptive
+          but no y_title
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; legend disabled (correct for single
+          series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lollipop chart with stems and circular markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly on y-axis, values on x-axis (horizontal orientation
+          per spec note for long labels)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thin stems, circular dots, sorted by value, horizontal orientation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate xrange
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly disabled for single-series data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "lollipop-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows range of values with good distribution; could show more variation
+          in spacing
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product electronics sales is a realistic, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for sales units; range is sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → style → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded), but no random seed needed since
+          no randomness used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses XY chart with custom node styling to hide baseline dots, custom
+          Style class, SVG rendering. Creative workaround for lollipop using XY coordinates.
+  verdict: APPROVED
diff --git a/plots/lollipop-basic/metadata/seaborn.yaml b/plots/lollipop-basic/metadata/seaborn.yaml
index dd26d8de65..3dcf9049f2 100644
--- a/plots/lollipop-basic/metadata/seaborn.yaml
+++ b/plots/lollipop-basic/metadata/seaborn.yaml
@@ -22,3 +22,173 @@ review:
   weaknesses:
   - Could add subtle value annotations at the markers to enhance readability
   - Relies heavily on matplotlib for stems rather than seaborn-specific features
+  image_description: The plot displays a horizontal lollipop chart showing product
+    sales by category. Blue horizontal stems (#306998) extend from the y-axis baseline
+    (0) to yellow circular markers (#FFD43B with blue edge) at each data value. The
+    chart shows 10 product categories sorted by sales value in ascending order from
+    bottom (Health at ~$18,000) to top (Electronics at ~$85,000). The title correctly
+    uses the format "lollipop-basic · seaborn · pyplots.ai". Axis labels are descriptive
+    ("Sales ($)" and "Product Category"), with subtle dashed grid lines on the x-axis.
+    Top and right spines are removed for a minimalist aesthetic.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title at 24pt, labels at 20pt, ticks
+          at 16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers (s=400) are appropriately sized and visible; stems are well-proportioned
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue stems and yellow markers provide excellent contrast and are
+          colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sales ($)" and "Product Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid present and subtle (alpha=0.3), but no legend needed (-0 for
+          grid is fine, deducting 0)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct lollipop chart with stems and circular markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values on x-axis (horizontal orientation)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thin stems, circular markers, sorted by value, baseline at 0
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible from 0 to max value
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed (single data series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "lollipop-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 10 categories with varying values demonstrating comparison;
+          could show more variation in step sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales by category is a real, comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in thousands of dollars are realistic for category-level
+          data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Uses deterministic data (no random generation needed), but no explicit
+          seed for documentation consistency
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: matplotlib.pyplot, pandas, seaborn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot for markers but relies on matplotlib for stems
+          (hlines); seaborn doesn't have a native lollipop chart function, so this
+          hybrid approach is acceptable but not showcasing seaborn's unique strengths
+  verdict: APPROVED
diff --git a/plots/manhattan-gwas/metadata/altair.yaml b/plots/manhattan-gwas/metadata/altair.yaml
index f0fda3ed8e..67dffdb066 100644
--- a/plots/manhattan-gwas/metadata/altair.yaml
+++ b/plots/manhattan-gwas/metadata/altair.yaml
@@ -26,3 +26,178 @@ review:
   weaknesses:
   - Y-axis label uses subscript notation (-log₁₀) which displays well, but some tick
     labels on the right side of smaller chromosomes (19-22) appear slightly cramped
+  image_description: 'The plot displays a Manhattan plot for GWAS data with all 22
+    chromosomes arranged along the x-axis. Points are colored in alternating blue
+    (#306998) and gray (#7F7F7F) for adjacent chromosomes, creating clear visual separation.
+    The y-axis shows -log₁₀(p-value) ranging from 0 to ~11. Two horizontal dashed
+    threshold lines are visible: a red line at approximately 7.3 (genome-wide significance,
+    p < 5×10⁻⁸) and an orange line at 5.0 (suggestive threshold, p < 1×10⁻⁵). Significant
+    peaks are visible above the genome-wide threshold on chromosomes 2, 6, 8, and
+    15, with these points displayed larger than non-significant ones. Suggestive signals
+    appear on chromosomes 3, 11, and 19. Chromosome labels (1-22) are centered below
+    their respective regions. The title "manhattan-gwas · altair · pyplots.ai" appears
+    at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title clear at 28pt, axis labels at 22pt, tick labels at 16pt - all
+          readable, slight crowding in smaller chromosomes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, chromosome labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points well-sized with appropriate alpha (0.7), significant hits
+          larger (100px vs 30px), very slight overplotting in dense regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/gray alternating scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with vconcat for labels, slight excess whitespace at
+          right edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label "-log₁₀(p-value)", X-axis labeled "Chromosome"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at 0.3 opacity, no legend needed (colors self-explanatory)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Manhattan plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Cumulative genomic position on X, -log10(p-value) on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Alternating colors, genome-wide threshold (7.3), suggestive threshold
+          (5.0), chromosome labels centered
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y scale extends to max+1
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, colors distinguish chromosomes
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"manhattan-gwas · altair · pyplots.ai" exact format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows significant peaks (chr 2,6,8,15), suggestive hits (chr 3,11,19),
+          and background noise - demonstrates full GWAS pattern
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated GWAS data with realistic chromosome lengths and p-value
+          distributions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: ~60K SNPs across 22 chromosomes, p-values range appropriately, chromosome
+          sizes proportional to human genome
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data generation → chart building
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of layered charts, conditional encoding for point size,
+          tooltips for interactivity, vconcat for layout - solid Altair usage but
+          could leverage more declarative features
+  verdict: APPROVED
diff --git a/plots/manhattan-gwas/metadata/bokeh.yaml b/plots/manhattan-gwas/metadata/bokeh.yaml
index b67e939b02..13fd1c46fe 100644
--- a/plots/manhattan-gwas/metadata/bokeh.yaml
+++ b/plots/manhattan-gwas/metadata/bokeh.yaml
@@ -26,3 +26,177 @@ review:
   - Missing legend to explain color coding (blue/light blue for chromosomes, yellow
     for significant)
   - Could add HoverTool to show SNP details on mouseover, which is a key Bokeh strength
+  image_description: 'The plot displays a Manhattan plot for GWAS data with approximately
+    44,000 SNPs across 22 chromosomes. The x-axis shows genomic position with chromosome
+    numbers (1-22) labeled below. The y-axis shows -log₁₀(p-value) ranging from 0
+    to about 12. Points use alternating blue shades (darker #306998 and lighter #7BA3C9)
+    for adjacent chromosomes, creating visual separation. Significant SNPs above the
+    genome-wide threshold are highlighted in yellow/gold (#FFD43B) and are larger
+    in size. Two horizontal threshold lines are present: a red dashed line at ~7.3
+    for genome-wide significance (p = 5×10⁻⁸) and a teal/green dotted line at 5 for
+    suggestive threshold (p = 1×10⁻⁵). The title "manhattan-gwas · bokeh · pyplots.ai"
+    appears in the top-left. The background is light gray (#FAFAFA). Significant peaks
+    are visible on chromosomes 2, 6, 11, and 17 as specified in the data generation.
+    Bokeh interactive tools are visible in the top-right corner.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, but some text on the right side
+          is small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Point sizes are appropriate for data density; significant SNPs clearly
+          visible with larger markers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/light-blue alternating scheme is colorblind-safe, yellow highlights
+          are distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, minor issue with data concentrated in lower
+          portion
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Genomic Position" and "-log₁₀(p-value)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, but no legend explaining the color scheme for significant
+          vs non-significant SNPs
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Manhattan plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Cumulative genomic position on X, -log10(p-value) on Y
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has significance thresholds, alternating colors, significant SNP
+          highlighting; missing optional SNP labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data including most significant SNPs
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend for color coding
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "manhattan-gwas · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows significant peaks on multiple chromosomes, realistic p-value
+          distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated GWAS data is a standard, neutral scientific context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic chromosome sizes, p-values range appropriately
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, interactive tools (pan, zoom), Span for threshold
+          lines, but could leverage more Bokeh features like HoverTool
+  verdict: APPROVED
diff --git a/plots/manhattan-gwas/metadata/highcharts.yaml b/plots/manhattan-gwas/metadata/highcharts.yaml
index cbdb9e3e5e..f6e68228a8 100644
--- a/plots/manhattan-gwas/metadata/highcharts.yaml
+++ b/plots/manhattan-gwas/metadata/highcharts.yaml
@@ -28,3 +28,180 @@ review:
     increased to 3-4px for better visibility'
   - Threshold line labels on the right side are somewhat small and could benefit from
     larger font or positioning closer to the line
+  image_description: 'The plot displays a Manhattan plot for GWAS data with approximately
+    14,000 simulated SNPs across 22 human chromosomes. The x-axis shows "Chromosome"
+    with numbers 1-22 labeled at chromosome midpoints. The y-axis shows "-log₁₀(p-value)"
+    ranging from 0 to 16. Data points alternate between blue (#306998) for odd chromosomes
+    and gray (#7a7a7a) for even chromosomes, creating clear visual separation. Two
+    horizontal threshold lines are visible: a red dashed line at y=7.3 labeled "Genome-wide
+    significance (p = 5×10⁻⁸)" and a blue dotted line at y=5 labeled "Suggestive (p
+    = 10⁻⁵)". Significant peaks are visible above the genome-wide threshold on chromosomes
+    2, 6, 11, and 17, with the highest reaching ~14.5. Suggestive signals appear on
+    chromosomes 4, 8, 15, and 20. The title reads "manhattan-gwas · highcharts · pyplots.ai"
+    with subtitle "Simulated GWAS Results Across Human Chromosomes". The plot uses
+    a white background with subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at large
+          canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, chromosome labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Data points visible with alternating colors; marker size could be
+          slightly larger for dense regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/gray alternating palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; slight margin imbalance on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"-log₁₀(p-value)" includes proper subscript notation, "Chromosome"
+          is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but legend is disabled and threshold line
+          labels are small
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Manhattan plot scatter type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Cumulative genomic position on X, -log10(p-value) on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Alternating colors, genome-wide significance line at 7.3, suggestive
+          line at 5, chromosome labels centered
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-16 shows all data including highest peaks
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled for this plot type (chromosomes distinguished
+          by color and label)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "manhattan-gwas · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows significant peaks, suggestive signals, and background noise;
+          could show more variation in peak heights
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GWAS simulation with realistic chromosome sizes and p-value distributions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: ~14k SNPs reasonable for visualization; real GWAS typically has more
+          but this is appropriate for demonstration
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (json, tempfile, time, urllib, numpy, selenium)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves both plot.png and plot.html (correct, but minor: animation=False
+          only on series, not chart level)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts plotLines for threshold annotations, custom x-axis
+          formatter, turboThreshold for large datasets; could use more interactive
+          features like hover highlighting
+  verdict: APPROVED
diff --git a/plots/manhattan-gwas/metadata/letsplot.yaml b/plots/manhattan-gwas/metadata/letsplot.yaml
index 9499faf82a..8618bcfce1 100644
--- a/plots/manhattan-gwas/metadata/letsplot.yaml
+++ b/plots/manhattan-gwas/metadata/letsplot.yaml
@@ -23,3 +23,174 @@ review:
   weaknesses:
   - Chromosome labels on x-axis could be slightly larger for better readability at
     full resolution
+  image_description: The Manhattan plot displays GWAS results across 22 chromosomes
+    arranged along the x-axis with cumulative genomic positions. Points are colored
+    in alternating blue (#306998) and light blue (#7A9BBD) for adjacent chromosomes.
+    The y-axis shows -log₁₀(p-value) ranging from 0 to 12. A horizontal dashed red
+    line indicates the genome-wide significance threshold at approximately 7.3 (-log₁₀(5×10⁻⁸)),
+    and a dotted gray line marks the suggestive threshold at 5. Significant SNPs above
+    the genome-wide threshold are highlighted with larger red points. Clear peaks
+    are visible on chromosomes 2, 6, 11, and 17, with additional suggestive signals
+    on chromosomes 1, 5, 8, 14, and 19. The title "manhattan-gwas · letsplot · pyplots.ai"
+    is displayed at the top in bold. The overall layout is well-balanced with good
+    use of the canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title large and bold, axis labels clear, chromosome labels readable
+          but slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points well-sized with appropriate alpha for dense data, significant
+          points clearly highlighted
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/gray alternating scheme is colorblind-safe, red highlights distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot fills appropriate area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has subscript notation (-log₁₀), but no units needed for this
+          type
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Minimal grid (major x removed), no legend needed (guide="none" appropriate)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Manhattan plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Cumulative position on X, -log₁₀(p) on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features: alternating colors, threshold lines, significant SNP
+          highlighting'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All chromosomes 1-22 visible, y-axis shows full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend hidden appropriately (colors self-explanatory)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "manhattan-gwas · letsplot · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows significant peaks, suggestive signals, and background noise
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated GWAS data with realistic patterns (44,000 SNPs across 22
+          chromosomes)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: P-values span realistic range, chromosome sizes vary appropriately
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" but uses path="." parameter
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar, scale_x_continuous with custom breaks/labels,
+          theme customization, but could use more advanced lets-plot features
+  verdict: APPROVED
diff --git a/plots/manhattan-gwas/metadata/matplotlib.yaml b/plots/manhattan-gwas/metadata/matplotlib.yaml
index 6f865b880c..117cbe5714 100644
--- a/plots/manhattan-gwas/metadata/matplotlib.yaml
+++ b/plots/manhattan-gwas/metadata/matplotlib.yaml
@@ -25,3 +25,187 @@ review:
   weaknesses:
   - No grid lines present (minor - acceptable for Manhattan plots to reduce clutter)
   - Y-axis label could include clearer notation
+  image_description: The Manhattan plot displays GWAS results across 22 chromosomes
+    on the x-axis with -log₁₀(p-value) on the y-axis. Chromosomes are clearly labeled
+    1-22 along the bottom. Points alternate between dark blue (#306998) and light
+    blue (#6699CC) for adjacent chromosomes, creating clear visual distinction. A
+    red dashed horizontal line marks the genome-wide significance threshold at -log₁₀(5×10⁻⁸)
+    ≈ 7.3, and a yellow dashed line marks the suggestive threshold at -log₁₀(1×10⁻⁵)
+    = 5. Significant SNPs above the genome-wide threshold are highlighted in red with
+    larger markers and white edges. Several significant peaks are visible on chromosomes
+    2, 6, 11, and 16, with the highest peak around -log₁₀(p) ≈ 14.5 on chromosome
+    2. The title "manhattan-gwas · matplotlib · pyplots.ai" is displayed at the top.
+    A legend in the upper right explains the threshold lines. The plot has clean styling
+    with no top/right spines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 14-16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, chromosome labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers appropriately sized for ~110k points (s=15 for regular, s=50
+          for significant); very dense at bottom but that's expected for GWAS data
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue alternating colors are colorblind-safe, red for significant
+          hits provides clear distinction
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has proper subscript notation (-log₁₀(p-value)), but no units
+          needed for this context; X-axis says "Chromosome" which is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid present (acceptable for Manhattan plots to reduce visual
+          clutter), legend is well-placed but partially redundant since threshold
+          lines are self-explanatory
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Manhattan plot visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Cumulative genomic position on X, -log₁₀(p-value) on Y, correctly
+          assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: alternating chromosome colors, genome-wide
+          significance threshold line at 7.3, suggestive threshold at 5, significant
+          SNPs highlighted in different color'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding (1.1× on Y, 1.01× on
+          X)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes threshold lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "manhattan-gwas · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows full range of GWAS data: non-significant baseline, suggestive
+          hits, and highly significant peaks across multiple chromosomes'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated GWAS data with realistic chromosome sizes, appropriate
+          number of SNPs (~40 per Mb), and significant peaks in specific chromosomes
+          mimicking real genetic signals
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic p-value ranges, appropriate SNP density, chromosome sizes
+          based on actual human genome
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data generation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, pandas used - all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses deprecated ".loc" assignment pattern with chained indexing warning
+          potential (should use .copy() or different approach)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct parameters
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: 'Good use of matplotlib features: rasterized=True for performance
+          with large datasets, spine removal, proper layering with zorder. Could use
+          more advanced features like PathCollection for better performance or custom
+          tick formatting.'
+  verdict: APPROVED
diff --git a/plots/manhattan-gwas/metadata/plotly.yaml b/plots/manhattan-gwas/metadata/plotly.yaml
index f939a6f0e7..f5c444b60e 100644
--- a/plots/manhattan-gwas/metadata/plotly.yaml
+++ b/plots/manhattan-gwas/metadata/plotly.yaml
@@ -25,3 +25,184 @@ review:
   - Could leverage more Plotly-specific interactive features like chromosome filtering
     dropdown or range slider for genomic position
   - Grid lines could be slightly more visible (currently very faint at alpha 0.1)
+  image_description: 'The plot displays a Manhattan plot for GWAS visualization with
+    -log₁₀(p-value) on the y-axis and chromosome positions (1-22) on the x-axis. Points
+    are colored in alternating shades of blue (darker blue #306998 for odd chromosomes,
+    lighter blue #7A9FBF for even chromosomes). Three clear peaks of significant SNPs
+    are visible: on chromosome 2 (reaching ~12), chromosome 8 (reaching ~9.5), and
+    chromosome 15 (reaching ~13). Significant SNPs above the genome-wide significance
+    threshold are highlighted as red diamond markers. Two horizontal threshold lines
+    are present: a red dashed line at ~7.3 for genome-wide significance (p = 5×10⁻⁸)
+    and a yellow/gold dotted line at 5 for the suggestive threshold (p = 10⁻⁵). The
+    title "manhattan-gwas · plotly · pyplots.ai" is centered at the top. A legend
+    in the upper left shows "Significant SNPs" with the red diamond marker. The overall
+    layout is clean with a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis titles at 24pt, tick fonts at 16-18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, chromosome labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized appropriately for dense GWAS data (size=5), good alpha=0.7,
+          significant SNPs highlighted distinctly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/light blue alternating colors are colorblind-safe, red for significant
+          points provides good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, balanced margins, plot fills appropriate area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "-log₁₀(p-value)" with proper subscript notation, X-axis
+          has "Chromosome"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), but legend placement in upper left could
+          overlap with data peaks in some scenarios; threshold line annotations are
+          well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Manhattan plot visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Cumulative genomic position on X, -log₁₀(p) on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: alternating chromosome colors, genome-wide
+          significance threshold, suggestive threshold, significant SNP highlighting,
+          chromosome labels centered'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range (0-13), X-axis shows all 22 chromosomes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Significant SNPs" with red diamond marker
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "manhattan-gwas · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple significant peaks across different chromosomes, demonstrates
+          the "Manhattan skyline" pattern well, includes SNPs at various significance
+          levels
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: GWAS data is a perfect neutral scientific context, simulated data
+          follows realistic patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: p-values and -log₁₀ transformations are realistic; chromosome lengths
+          are reasonable approximations
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → plot creation → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at the beginning
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and plotly.graph_objects used, all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses Plotly's graph_objects effectively with custom hover templates,
+          but doesn't leverage Plotly's full interactive potential (e.g., clickable
+          points with callbacks, range sliders, dropdown filters for chromosomes).
+          The HTML output does provide basic interactivity (hover, zoom, pan).
+  verdict: APPROVED
diff --git a/plots/manhattan-gwas/metadata/plotnine.yaml b/plots/manhattan-gwas/metadata/plotnine.yaml
index 8b7adaf87e..8e1980c382 100644
--- a/plots/manhattan-gwas/metadata/plotnine.yaml
+++ b/plots/manhattan-gwas/metadata/plotnine.yaml
@@ -23,3 +23,176 @@ review:
   - X-axis chromosome labels 19-22 are slightly crowded; consider rotating or using
     abbreviated labels
   - Could highlight or label top significant SNPs as noted in spec
+  image_description: The Manhattan plot displays GWAS results across 22 chromosomes
+    on a 16:9 landscape canvas. Points are colored with alternating blue (#306998)
+    and gray (#636363) for adjacent chromosomes, creating clear visual distinction.
+    The x-axis shows "Chromosome" with labels 1-22, though some smaller chromosome
+    labels (19, 20, 21, 22) appear crowded. The y-axis displays "-log₁₀(p-value)"
+    ranging from 0 to ~10.5. A red dashed horizontal line marks the genome-wide significance
+    threshold at ~7.3, and an orange dotted line marks the suggestive threshold at
+    5.0. Significant peaks are visible above the threshold line on chromosomes 2,
+    6, 11, and 17, with suggestive signals on chromosomes 4, 9, 15, and 20. The title
+    reads "manhattan-gwas · plotnine · pyplots.ai" centered at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clear and well-sized; y-axis subscript
+          formatting (₁₀) is excellent
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor crowding of chromosome labels 19-22 on x-axis
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Point size and alpha (1.2, 0.7) are well-adapted for ~8000 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/gray alternation is colorblind-safe, red/orange thresholds distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot fills majority of area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Chromosome" and "-log₁₀(p-value)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed (correct), but vertical grid lines are removed creating
+          slight visual discontinuity
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Manhattan plot for GWAS data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Cumulative position on X, -log10(p-value) on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Alternating colors, genome-wide threshold, suggestive threshold,
+          chromosome labels centered
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis scaled appropriately with 5% padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed/shown (correct for this plot type)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "manhattan-gwas · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows significant peaks (chr 2, 6, 11, 17), suggestive signals (chr
+          4, 9, 15, 20), and baseline noise; could have slightly more variation in
+          peak heights
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated GWAS data with realistic chromosome sizes and p-value distributions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Good p-value range; ~8000 SNPs is reasonable but smaller than typical
+          GWAS (noted in spec as 100K-1M)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Save uses explicit width/height which may override figure_size in
+          theme
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot2 grammar (aes, geom_point, geom_hline, scale_*,
+          theme_minimal, element_text/element_blank), but could leverage additional
+          plotnine features like annotate() for labeling top SNPs
+  verdict: APPROVED
diff --git a/plots/manhattan-gwas/metadata/seaborn.yaml b/plots/manhattan-gwas/metadata/seaborn.yaml
index be5d0d98ad..f86c3438ba 100644
--- a/plots/manhattan-gwas/metadata/seaborn.yaml
+++ b/plots/manhattan-gwas/metadata/seaborn.yaml
@@ -24,3 +24,181 @@ review:
   - Y-axis grid could be styled more subtly (current alpha=0.3 is slightly prominent)
   - Significant SNPs highlighting uses raw matplotlib instead of seaborn; could use
     a second scatterplot call for consistency
+  image_description: 'The Manhattan plot displays simulated GWAS data with ~10,000
+    SNPs across 22 chromosomes. The x-axis shows chromosome positions with labels
+    1-22 centered beneath each chromosome''s region. The y-axis shows -log₁₀(p-value)
+    ranging from 0 to ~12. Points are colored with alternating blue (#306998) and
+    light blue/gray (#8B9DC3) for adjacent chromosomes. Three distinct significant
+    peaks are visible: a major peak on chromosome 6 reaching -log₁₀(p) ≈ 12 (highlighted
+    in yellow), a moderate peak on chromosome 11 reaching ≈ 9, and a smaller peak
+    on chromosome 2 reaching ≈ 8. Yellow markers with black edges highlight SNPs above
+    the genome-wide significance threshold. Two horizontal threshold lines are shown:
+    a red dashed line at y=7.3 (genome-wide significance, p < 5×10⁻⁸) and a gray dotted
+    line at y=5 (suggestive threshold, p < 1×10⁻⁵). The legend is positioned in the
+    upper right. The plot has a clean appearance with only y-axis gridlines and top/right
+    spines removed.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 12-14pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, chromosome labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Good marker size (s=15) and alpha (0.7) for dense data; significant
+          hits highlighted well with larger yellow markers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/light blue alternating scheme is colorblind-safe; yellow highlights
+          provide excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned nicely
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has proper subscript notation (-log₁₀(p-value)), but x-axis
+          just says "Chromosome" without indicating it's genomic position
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend entries are clear and well-positioned; however, y-axis grid
+          at alpha=0.3 is appropriate
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Manhattan plot for GWAS data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = cumulative genomic position, Y = -log10(p-value)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Alternating chromosome colors, significance threshold at 7.3, suggestive
+          threshold at 5, significant SNPs highlighted
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full genome displayed, y-axis shows all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels both threshold lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "manhattan-gwas · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: shows realistic distribution with multiple peaks of varying
+          significance (chr6 major, chr11 moderate, chr2 smaller)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Simulates real GWAS patterns: chr6 peak mimics MHC region association,
+          realistic p-value distribution using beta distribution'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Chromosome sizes are biologically accurate; however, ~10k SNPs is
+          on the lower end for GWAS (spec mentions 100k-1M typical)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern seaborn API used correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn scatterplot with hue for alternating colors
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of seaborn's scatterplot with palette, but falls back to
+          matplotlib for significant point highlighting rather than using seaborn
+          throughout
+  verdict: APPROVED
diff --git a/plots/marimekko-basic/metadata/altair.yaml b/plots/marimekko-basic/metadata/altair.yaml
index 16ed7e4729..0c32bec891 100644
--- a/plots/marimekko-basic/metadata/altair.yaml
+++ b/plots/marimekko-basic/metadata/altair.yaml
@@ -24,3 +24,175 @@ review:
   - Y-axis tick labels are somewhat dense (every 5 units) which could be simplified
   - Grid lines could be more subtle (currently at 0.3 opacity)
   - Could add value labels on larger segments as suggested in spec notes
+  image_description: 'The plot displays a Marimekko chart showing market share by
+    region and product line. Four variable-width bars represent regions: Asia Pacific
+    and North America (equal width, $300M each), Europe (slightly narrower, $240M),
+    and Latin America (narrowest, $130M). Each bar is subdivided into four colored
+    segments representing product lines: Electronics (dark blue #306998), Clothing
+    (golden yellow #FFD43B), Food (teal #4ECDC4), and Home (coral/salmon #E76F51).
+    The y-axis shows "Product Mix (%)" from 0-100, and region labels with market sizes
+    appear below each bar. A legend titled "Product Line" is positioned on the right
+    side. The title "marimekko-basic · altair · pyplots.ai" appears at the top center.
+    White stroke separates segments for clarity.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend are clearly readable; tick labels
+          slightly dense
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rect segments perfectly sized and visible with white stroke separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast between all four colors
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight empty space on right side near legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Product Mix (%)" with unit, but no traditional x-axis
+          label
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed, grid visible but slightly prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Marimekko chart with variable-width bars and stacked segments
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Regions as x-categories (width = total), Products as y-categories
+          (height = proportion)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Variable widths, proportional heights, color coding, legend, region
+          labels with totals
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis 0-100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows `{spec-id} · {library} · pyplots.ai` format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying market sizes and different product mixes per region,
+          but product proportions could vary more dramatically
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Real-world scenario: market share analysis by region and product
+          line with plausible revenue figures'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values in millions are reasonable; slight uniformity in product
+          distribution
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but also "plot.html" which adds unnecessary files
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses mark_rect, layered charts, tooltips; could leverage more Altair-specific
+          features like selections
+  verdict: APPROVED
diff --git a/plots/marimekko-basic/metadata/bokeh.yaml b/plots/marimekko-basic/metadata/bokeh.yaml
index bc2d44d883..e3794eba2b 100644
--- a/plots/marimekko-basic/metadata/bokeh.yaml
+++ b/plots/marimekko-basic/metadata/bokeh.yaml
@@ -24,3 +24,172 @@ review:
   - Legend position overlaps slightly with the rightmost portion of the chart area
   - No y-axis scale to indicate percentage proportions (0-100%)
   - Hover tool is only useful in HTML output, not visible in PNG
+  image_description: 'The plot displays a Marimekko chart showing market share data
+    across 4 regions. Bar widths vary proportionally to market size: Asia Pacific
+    (00B) is widest, followed by North America (00B), Europe (50B), and Latin America
+    (50B) which is narrowest. Each bar is divided into 4 stacked segments representing
+    product categories: Electronics (blue #306998), Apparel (yellow #FFD43B), Home
+    & Garden (teal #4ECDC4), and Food & Beverage (coral #E8685D). Value labels (0B-00B)
+    appear on larger segments in white bold text. Region names with total values are
+    displayed below each bar. A legend in the upper right shows product categories.
+    The title "marimekko-basic · bokeh · pyplots.ai" is centered at top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, labels at 22-24pt are readable; region labels slightly
+          small relative to canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Segments are well-sized, value labels only on sufficiently large
+          segments
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors with good contrast, colorblind-friendly palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; legend placement slightly overlaps with Asia
+          Pacific bar area
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Custom labels used instead of axes (acceptable for Marimekko), but
+          no y-axis percentage scale
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No distracting grid, legend is clear with color boxes and text
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Marimekko chart with variable-width bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-categories as bar widths, Y-categories as stacked segments
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Variable widths, stacked segments, value labels, legend all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, proportions accurate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match product categories exactly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "marimekko-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying bar widths and different proportions across regions;
+          could show more dramatic variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by region and product line is a real business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in billions are realistic; some segments could have more variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Deterministic data (no random), but no explicit seed statement
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, HoverTool, LabelSet, but hover interactivity
+          only works in HTML
+  verdict: APPROVED
diff --git a/plots/marimekko-basic/metadata/highcharts.yaml b/plots/marimekko-basic/metadata/highcharts.yaml
index bfe714cbd5..198650d9e8 100644
--- a/plots/marimekko-basic/metadata/highcharts.yaml
+++ b/plots/marimekko-basic/metadata/highcharts.yaml
@@ -22,3 +22,170 @@ review:
   weaknesses:
   - Grid lines not visible on the plot area (gridLineWidth set but not appearing prominently)
   - Y-axis tick labels could be slightly larger for optimal readability at 4800x2700
+  image_description: 'The plot displays a Marimekko chart showing market share distribution
+    across 5 regions (North America, Europe, Asia Pacific, Latin America, Middle East).
+    Each region has a variable-width bar proportional to its market size in millions
+    USD. The bars are stacked with 4 product categories: Enterprise (dark blue #306998),
+    SMB (yellow #FFD43B), Consumer (purple #9467BD), and Government (cyan #17BECF).
+    Percentage labels appear on each segment (e.g., 42%, 28%, 22%, 8% for North America).
+    The title "marimekko-basic · highcharts · pyplots.ai" appears at top with a subtitle.
+    The legend is positioned on the right side. Region names appear below each bar,
+    and the x-axis shows cumulative market size values.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, labels, and data labels are readable; y-axis tick labels are
+          slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: All segments clearly visible with good sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette (blue, yellow, purple, cyan)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; slight extra whitespace at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Market Share (%)", X-axis has "Market Size (Millions
+          USD)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No visible grid lines on plot area; legend well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Marimekko chart using variwide with stacking
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Bar widths = market size, heights = market share percentage
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Variable widths, stacked segments, color coding, value labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis 0-100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "marimekko-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows varying bar widths, different distributions per region
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by region and product line is a perfect real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Market sizes (120-520M USD) and percentages (8-45%) are realistic
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but hardcoded data is
+          fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses variwide chart type with stacking, custom renderer for region
+          labels, but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/marimekko-basic/metadata/letsplot.yaml b/plots/marimekko-basic/metadata/letsplot.yaml
index abbeae6bc7..0f67a2e699 100644
--- a/plots/marimekko-basic/metadata/letsplot.yaml
+++ b/plots/marimekko-basic/metadata/letsplot.yaml
@@ -24,3 +24,173 @@ review:
     given sufficient space
   - Grid lines are removed entirely - subtle grid lines could help readers trace percentage
     values
+  image_description: The plot displays a Marimekko chart showing market share data
+    across four regions (North America, Europe, Asia Pacific, Latin America) with
+    four product lines (Electronics in blue, Apparel in yellow, Home Goods in green,
+    Food & Beverage in red). Each region is represented as a variable-width column
+    where the width is proportional to total market size - Asia Pacific is the widest
+    (~36%), followed by Europe and North America (~27% each), with Latin America being
+    the narrowest (~11%). Within each column, stacked segments show the proportion
+    of each product line, with dollar values labeled in white bold text (e.g., "$120.0M",
+    "$180.0M"). The y-axis shows "Share within Region (%)" from 0-100, and the x-axis
+    shows "Market Size Distribution" with region names slightly angled. A legend on
+    the right identifies the four product lines. The title follows the required format.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels and values clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Segments perfectly sized and visible, value labels clear on all segments
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, red palette is colorblind-distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend positioned appropriately on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis has units (%), X-axis lacks units but descriptive (-1)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid completely removed; subtle grid would help trace percentages
+          (-2)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Marimekko/mekko chart with variable-width bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Regions as x-categories (bar widths), products as y-categories (stacking)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Variable widths, stacking, value labels, legend all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, 0-100% range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"marimekko-basic · letsplot · pyplots.ai" matches required format'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows varying bar widths (market sizes) and different product mixes
+          per region
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by region and product line is a real business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in millions ($25M-$180M) are realistic market sizes
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: No random seed, but data is deterministic (hardcoded values) - acceptable
+          but not documented (-2)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses geom_rect and geom_text effectively, but this is standard ggplot2-style
+          usage rather than lets-plot specific features
+  verdict: APPROVED
diff --git a/plots/marimekko-basic/metadata/matplotlib.yaml b/plots/marimekko-basic/metadata/matplotlib.yaml
index 0a573aa273..09a04c7f2c 100644
--- a/plots/marimekko-basic/metadata/matplotlib.yaml
+++ b/plots/marimekko-basic/metadata/matplotlib.yaml
@@ -25,3 +25,179 @@ review:
     be positioned inside the plot area
   - The y-axis label is clear but the plot would benefit from a subtitle explaining
     the width interpretation
+  image_description: 'The plot displays a Marimekko chart with 4 variable-width bars
+    representing global regions (North America, Europe, Asia Pacific, Latin America).
+    Bar widths are proportional to each region''s total market size—Asia Pacific is
+    widest ($500M) while Latin America is narrowest ($120M). Each bar is vertically
+    divided into 4 stacked segments representing product lines: Electronics (Python
+    blue #306998), Apparel (Python yellow #FFD43B), Home & Garden (green #4DAF4A),
+    and Sports (purple #984EA3). The y-axis shows percentage share (0%-100%) labeled
+    "Share within Region". Value labels (e.g., "$120M", "$200M") appear on larger
+    segments with white text on blue backgrounds and black text on lighter colors.
+    Region names with total market sizes appear below each bar. A legend titled "Product
+    Lines" sits outside the plot to the upper right. The title correctly follows the
+    format: "marimekko-basic · matplotlib · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all elements well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments clearly visible with good contrast and white borders
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, green, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but legend placement creates some empty space in
+          upper right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis label is descriptive but lacks unit context (already shows
+          %)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle y-axis grid (alpha 0.3, dashed), legend well-placed with title
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Marimekko chart with variable-width bars and stacked segments
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-categories (regions) correctly determine bar widths, Y-categories
+          (products) correctly stacked
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: proportional widths, stacked heights,
+          value labels, legend, category names'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis spans 0-100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly match product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "marimekko-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different bar widths and varying proportions across regions,
+          but distributions are somewhat similar
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by region and product line is a real, comprehensible
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible ($15M to $200M range), though some proportions
+          could show more variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses deterministic data (no random), but no seed needed since data
+          is explicit
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300 and bbox_inches='tight'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ax.bar with bottom parameter for stacking, ax.text for
+          annotations, but no advanced matplotlib features like custom patches or
+          transforms
+  verdict: APPROVED
diff --git a/plots/marimekko-basic/metadata/plotly.yaml b/plots/marimekko-basic/metadata/plotly.yaml
index 7d7924f036..09d345aa1e 100644
--- a/plots/marimekko-basic/metadata/plotly.yaml
+++ b/plots/marimekko-basic/metadata/plotly.yaml
@@ -25,3 +25,182 @@ review:
     values
   - Color palette could be more colorblind-friendly; blue and teal may be hard to
     distinguish for some viewers
+  image_description: 'The plot displays a Marimekko chart showing market share by
+    region and product line. Four regions are shown on the x-axis: North America,
+    Europe, Asia Pacific, and Latin America, with bar widths proportional to market
+    size (Asia Pacific is widest, Latin America is narrowest). Each bar is divided
+    into three stacked segments representing product lines: Enterprise (dark blue
+    #306998), SMB (yellow #FFD43B), and Consumer (teal #4ECDC4). Value labels in white
+    text show dollar amounts (e.g., $120M, $180M) inside each segment. The y-axis
+    shows percentage from 0% to 100%. A horizontal legend at the top identifies the
+    three product lines. The title reads "Market Share by Region · marimekko-basic
+    · plotly · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt, value labels
+          at 18pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; value labels fit within segments, axis labels
+          well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments are clearly visible with white borders separating them
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but the blue-teal combination could be
+          slightly challenging for some colorblind viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space, plot fills ~70% of area with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Region (width = market size)"
+          and "Product Mix (share within region)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well positioned but grid lines are extremely subtle (alpha
+          0.1), making percentage benchmarks hard to reference
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Marimekko/mekko chart with variable-width stacked bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-categories (regions) determine bar width, Y-categories (products)
+          are stacked, values determine proportions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: proportional widths, stacked heights,
+          value labels on segments'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full 0-100%, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Market Share by Region · marimekko-basic · plotly
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in both width and segment proportions; Asia Pacific
+          has largest market and different product mix than others
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by region and product line is a classic real-world use
+          case for Marimekko charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in millions USD are realistic; however, some regions have
+          very similar totals (NA=270, EU=250) which doesn't maximize width variation
+          demonstration
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → calculations → figure
+          → layout → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic hardcoded data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Bar with customdata for hover templates, but could leverage
+          more Plotly interactivity features
+  verdict: APPROVED
diff --git a/plots/marimekko-basic/metadata/plotnine.yaml b/plots/marimekko-basic/metadata/plotnine.yaml
index 10a81ba048..d9c93ad73e 100644
--- a/plots/marimekko-basic/metadata/plotnine.yaml
+++ b/plots/marimekko-basic/metadata/plotnine.yaml
@@ -23,3 +23,177 @@ review:
   - Grid lines on y-axis could use alpha transparency for subtler appearance
   - Legend is sorted alphabetically rather than matching the stacking order in the
     bars
+  image_description: The plot displays a Marimekko chart showing market share by region
+    and product line. Four regions (Asia Pacific, Europe, Latin America, North America)
+    are shown as variable-width stacked bars where width represents total market size.
+    Each bar is divided into four product categories (Electronics in dark blue, Hardware
+    in coral/red, Services in teal, Software in yellow). The bars have white borders
+    between segments. Value labels (e.g., "$200M", "$140M") appear on larger segments.
+    The y-axis shows percentage (0%-100%), x-axis shows region names centered under
+    each bar. A legend on the right identifies the four product lines. The title follows
+    the required format with spec-id, library, and pyplots.ai branding.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 24pt, axis titles 20pt, tick labels 16pt, legend text
+          14-18pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all region labels clearly separated, value labels
+          well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles clearly visible with white borders, appropriate sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color contrast, distinct hues; blue/teal could be slightly more
+          distinct for some colorblind viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight imbalance with legend taking space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Market Segment (width = total market size)"
+          and "Product Share (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Horizontal grid lines present but vertical grid appropriately hidden;
+          however y-axis grid could be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Marimekko/mosaic chart with variable-width bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-category (regions) determines width, Y-category (products) for
+          stacking
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Variable widths, stacking, color coding, value labels on larger segments,
+          legend present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, 0-100% y-axis, full x-axis coverage
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four product lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Market Share by Region · marimekko-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying bar widths (Latin America smallest), different product
+          mixes per region; could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Real business scenario: market share by region and product line
+          with plausible data'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in $M range appropriate for market data, percentages 0-100%
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic but no explicit seed; data values are hardcoded
+          so reproducible
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (geom_rect, geom_text, scale_*, theme), but geom_rect
+          is basic; could have used coord_fixed or more advanced grammar features
+  verdict: APPROVED
diff --git a/plots/marimekko-basic/metadata/pygal.yaml b/plots/marimekko-basic/metadata/pygal.yaml
index 81f433b764..f9c2a706cf 100644
--- a/plots/marimekko-basic/metadata/pygal.yaml
+++ b/plots/marimekko-basic/metadata/pygal.yaml
@@ -28,3 +28,190 @@ review:
     Marimekko support
   - Legend placement at bottom creates some visual distance from the chart
   - Smallest segments (MEA region) are quite narrow, making labels cramped
+  image_description: 'The plot displays a Marimekko chart showing market share data
+    across 5 regions (North America, Europe, Asia Pacific, Latin America, MEA) with
+    4 product categories (Enterprise, Consumer, SMB, Government). Bar widths vary
+    proportionally to represent regional market sizes - Asia Pacific is widest (32%),
+    followed by North America (27%), Europe (23%), Latin America (11%), and MEA (7%).
+    Each bar is stacked with 4 segments in distinct colors: blue (Enterprise), yellow
+    (Consumer), teal (SMB), and coral/pink (Government). Percentage labels appear
+    within larger segments. The Y-axis shows "Share within Region (%)" from 0% to
+    100% with dashed gridlines at 25% intervals. Category labels with width percentages
+    appear below each bar. A legend at the bottom identifies the 4 product lines.
+    The title "marimekko-basic · pygal · pyplots.ai" appears at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable; title, axis labels, and percentage
+          labels are well-sized. Minor: some percentage labels in smaller segments
+          are slightly cramped.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels are well-spaced and the legend items
+          don't collide.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Segments are clearly visible with good sizing; white borders between
+          segments provide excellent separation. Minor: smallest segments (MEA columns)
+          are narrow but still visible.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, teal, and coral provide excellent contrast and are
+          colorblind-friendly (no red-green reliance).
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space; plot fills most of the area. Minor: slightly
+          more bottom margin than needed.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label "Share within Region (%)"; X-axis shows
+          region names with market share percentages.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Dashed gridlines are subtle (good). Legend placement at bottom is
+          functional but slightly far from the chart.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Marimekko/mekko chart with variable-width bars and stacked
+          segments.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-categories (regions) determine bar widths; Y-categories (products)
+          are stacked within bars.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Variable bar widths, stacked segments, color-coded categories, percentage
+          labels on larger segments, legend present.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible; bars sum to 100% height, widths sum to 100%.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 product categories with matching
+          colors.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "marimekko-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows variation in both dimensions: different region sizes and different
+          product mixes per region. Minor: could show more extreme variation between
+          regions.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by region and product line is a perfect real-world use
+          case for Marimekko charts.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Revenue values in millions are realistic for enterprise software.
+          Minor: Latin America and MEA values seem proportionally small but plausible.'
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses a custom class (Marimekko) which violates KISS principle. However,
+          this is NECESSARY because pygal has no native Marimekko chart type - the
+          custom class is the only way to implement this spec.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.svg, plot.html).
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of pygal's SVG capabilities by extending the Graph
+          base class. Custom implementation leverages pygal's SVG node creation, styling
+          system, and rendering pipeline. The solution demonstrates deep understanding
+          of pygal's architecture.
+  verdict: APPROVED
diff --git a/plots/marimekko-basic/metadata/seaborn.yaml b/plots/marimekko-basic/metadata/seaborn.yaml
index 8eb1f6afd9..0327055e40 100644
--- a/plots/marimekko-basic/metadata/seaborn.yaml
+++ b/plots/marimekko-basic/metadata/seaborn.yaml
@@ -27,3 +27,178 @@ review:
   - While seaborn styling is used appropriately (set_style, set_context, despine),
     the core chart uses matplotlib patches since seaborn lacks native marimekko support
     - this is acceptable given the library limitation
+  image_description: 'The plot displays a Marimekko chart showing market share data
+    across 5 geographic regions (North America, Europe, Asia Pacific, Latin America,
+    Middle East). Bar widths are proportional to each region''s total revenue, with
+    Asia Pacific having the widest bar and Middle East the narrowest. Each bar is
+    stacked with 4 product categories using seaborn''s colorblind palette: Electronics
+    (blue), Apparel (gold/orange), Food & Beverage (teal/green), and Home Goods (rust
+    brown). The y-axis shows percentage from 0% to 100%. White bold value labels (e.g.,
+    "$45B", "$65B") appear on larger segments. The title "marimekko-basic · seaborn
+    · pyplots.ai" is centered at the top. A legend titled "Product Line" is positioned
+    outside the chart on the right side.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title at 24pt bold, axis labels at
+          20pt, tick labels at 16pt, value labels at 14pt bold white'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; region labels well-spaced, value labels centered
+          within segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles sized appropriately with clear white edges separating
+          segments
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's colorblind palette; all 4 colors clearly distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, though legend placement creates some right-side
+          whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Region (width proportional to total revenue)"
+          and "Product Mix (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid at alpha=0.3 is subtle, but legend is placed outside plot area
+          requiring extra space
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Marimekko/mosaic chart with variable-width bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Regions as x-categories (widths), products as y-categories (heights)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features: variable width bars, stacked segments, value
+          labels, legend, proportional areas'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis 0-100%, x-axis spans full width
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 product categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "marimekko-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying region sizes and different product mixes; good variation
+          but could show more extreme differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent real-world scenario: market share by region and product
+          line with plausible revenue figures'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Values in billions are realistic, but some proportions are quite
+          similar across regions
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) though data is actually deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, pandas, matplotlib patches, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png with correct parameters
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_style, sns.set_context, sns.color_palette, and sns.despine,
+          but no seaborn plot functions (marimekko requires matplotlib patches)
+  verdict: APPROVED
diff --git a/plots/network-basic/metadata/altair.yaml b/plots/network-basic/metadata/altair.yaml
index 4ad13be591..f68d747e8c 100644
--- a/plots/network-basic/metadata/altair.yaml
+++ b/plots/network-basic/metadata/altair.yaml
@@ -25,3 +25,174 @@ review:
   - Some label overlap in densely connected areas (Noah/Mia/Olivia region)
   - Could leverage Altair interactive selection features for highlighting connected
     nodes
+  image_description: 'The plot shows a social network graph with 20 nodes (people)
+    connected by gray edges. Nodes are colored by community: blue (Group A - Alice,
+    Bob, Carol, David, Eve), yellow (Group B - Frank, Grace, Henry, Ivy, Jack), green
+    (Group C - Kate, Leo, Mia, Noah, Olivia), and coral/orange (Group D - Paul, Quinn,
+    Ryan, Sara, Tom). The title reads "Social Network · network-basic · altair · pyplots.ai"
+    at the top. A legend labeled "Communities" appears in the upper right showing
+    the four group colors. Each node displays the person''s name as a label. The network
+    shows clear community structure with clusters positioned via force-directed layout,
+    and cross-group connections (bridges) visible between communities. Node sizes
+    vary based on degree (number of connections).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is clear at 28pt, labels readable but some are slightly small
+          at 12pt
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor overlap on a few closely positioned labels (e.g., Noah/Mia
+          area)
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes well-sized with degree-based sizing, edges clearly visible
+          with appropriate opacity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, coral palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, network well-centered with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for network graphs (no axes displayed, which is correct)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed, no grid needed for network
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct node-link network diagram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly represented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Force-directed layout, node sizing by degree, community coloring,
+          labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 20 nodes and all edges visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 4 communities
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format with spec-id, library, pyplots.ai
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows community structure, bridges, varying degrees; could show more
+          hub nodes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plausible social network scenario with named individuals
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 20 nodes is good, edge density appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → layout → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (minor, but spec only requires
+          png)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding, layering, and tooltips; could
+          leverage more interactivity
+  verdict: APPROVED
diff --git a/plots/network-basic/metadata/highcharts.yaml b/plots/network-basic/metadata/highcharts.yaml
index 55cff23ae5..521fcbe162 100644
--- a/plots/network-basic/metadata/highcharts.yaml
+++ b/plots/network-basic/metadata/highcharts.yaml
@@ -23,3 +23,179 @@ review:
   - Some node labels appear outside nodes (Alice, David, Grace, Frank, Olivia, Noah,
     Sara, Paul) rather than centered inside, creating visual inconsistency
   - Network could be positioned more centrally with legend closer to the visualization
+  image_description: 'The plot displays a network graph visualization of a social
+    network with 20 people organized into 4 communities. Nodes are colored by community:
+    blue (Community A - Alice, Bob, Carol, David, Eve), yellow/gold (Community B -
+    Frank, Grace, Henry, Ivy, Jack), teal/cyan (Community C - Kate, Leo, Mia, Noah,
+    Olivia), and purple (Community D - Paul, Quinn, Ryan, Sara, Tom). Gray edges connect
+    nodes showing friendship relationships both within and between communities. Node
+    sizes vary based on degree (connection count), with more connected nodes appearing
+    larger. The title "network-basic · highcharts · pyplots.ai" appears at the top.
+    A legend on the right identifies the four communities. Some node labels appear
+    inside nodes while others (Alice, David, Grace, Frank, Olivia, Noah, Sara, Paul)
+    appear outside near their nodes.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title is clear at 48px. Node labels inside nodes are readable but
+          some external labels (Alice, David, Grace, etc.) are smaller and less prominent
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor visual congestion in some areas where nodes cluster, but labels
+          are generally readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes are well-sized with degree-based scaling. Edges are visible.
+          Some nodes overlap slightly due to force-directed simulation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Colorblind-safe palette: blue, yellow, teal, purple - no red-green
+          conflicts'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Network centered in canvas with reasonable use of space, though legend
+          is far from the network
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: false
+        comment: N/A for network graphs (axes hidden appropriately)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend clearly shows all 4 communities with matching colors
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct network graph (node-link diagram)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly represented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Force-directed layout, node sizing by degree, community coloring,
+          labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 20 nodes and 31 edges visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies 4 communities
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: network-basic · highcharts · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows community structure, cross-community bridges, varying node
+          degrees. Could show more variation in edge density
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Social network with named people and friendship connections is realistic
+          and relatable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 20 nodes is appropriate for static visualization per spec (10-50
+          range)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear structure but uses helper calculations and complex
+          JS injection
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Fixed initial positions with deterministic circular layout
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses older Highcharts patterns
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: NetworkGraphSeries, force-directed simulation, custom node markers,
+          Selenium rendering
+  verdict: APPROVED
diff --git a/plots/network-basic/metadata/letsplot.yaml b/plots/network-basic/metadata/letsplot.yaml
index 5cc6c0d90c..09594581e8 100644
--- a/plots/network-basic/metadata/letsplot.yaml
+++ b/plots/network-basic/metadata/letsplot.yaml
@@ -28,3 +28,173 @@ review:
     shown in the plot
   - Some label pairs are slightly close together (Leo/Kate, Noah/Mia areas) though
     not overlapping
+  image_description: 'The plot displays a network graph visualization titled "Office
+    Social Network · network-basic · letsplot · pyplots.ai". It shows 20 nodes representing
+    people across 4 departments: Research (Python blue #306998), Marketing (yellow
+    #FFD43B), Engineering (green #2CA02C), and Design (orange #E64A19). Each node
+    is labeled with a person''s name (Alice, Bob, Carol, etc.) with labels positioned
+    above the nodes. Gray edges (alpha ~0.4) connect nodes to show friendship relationships.
+    The layout demonstrates clear group clustering with Research at top-left, Marketing
+    at top-right, Engineering at bottom-left, and Design at bottom-right. Cross-group
+    connections (bridges) are visible between communities. Node sizes vary based on
+    degree/connections. The legend is positioned on the right side showing department
+    colors. The canvas is well-utilized with the network centered and properly scaled.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt bold, node labels at 9pt bold in dark color, legend
+          text at 14-16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Labels positioned above nodes with offset, minimal overlap; some
+          labels slightly close (Leo/Kate area)
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes sized appropriately (8-16px based on degree), good alpha on
+          nodes (0.95), edges visible at alpha 0.4
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, green, orange) are colorblind-safe
+          and highly distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization (~50-60%), network centered but slightly
+          right-shifted due to legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for network graphs, axes hidden appropriately - full points
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid hidden (correct for network), but legend shows point markers
+          that don't match node sizes
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct network/node-link diagram with nodes and edges
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes positioned correctly, edges connect proper source/target
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Force-directed layout, node sizing by degree, group coloring, labels
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 20 nodes visible, all edges shown within bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 departments
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Office Social Network · network-basic · letsplot · pyplots.ai"
+          follows correct format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 communities, cross-group bridges, varying node degrees; could
+          show more prominent central hub nodes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Office social network with named employees in realistic departments
+          (Research, Marketing, Engineering, Design)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 20 nodes, 31 edges is good; node positions normalized well to [0.1,
+          0.9]
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → layout → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but code has nested for-loops (minor complexity)
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/network-basic/metadata/matplotlib.yaml b/plots/network-basic/metadata/matplotlib.yaml
index e47e507ea4..67c2e7c490 100644
--- a/plots/network-basic/metadata/matplotlib.yaml
+++ b/plots/network-basic/metadata/matplotlib.yaml
@@ -22,3 +22,177 @@ review:
   weaknesses:
   - Legend labels are generic (Group A/B/C/D) rather than thematic
   - Edge rendering could benefit from curved edges or arrows to show connection direction
+  image_description: 'The plot displays a network graph (node-link diagram) representing
+    a social network with 20 people arranged in 4 communities. Nodes are circular
+    with bold black labels inside (names like Alice, Bob, Carol, etc.). The four groups
+    are color-coded: Group A (blue, #306998), Group B (yellow, #FFD43B), Group C (green,
+    #4CAF50), and Group D (orange/coral, #FF7043). Edges are semi-transparent gray
+    lines connecting nodes. The force-directed layout successfully clusters groups
+    together while showing cross-community bridge connections. Node sizes vary slightly
+    based on degree (number of connections). A legend titled "Communities" in the
+    upper-left shows the four group colors. The title reads "Social Network · network-basic
+    · matplotlib · pyplots.ai". The layout is clean with no axis elements visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt, labels are 11pt bold and clearly readable inside nodes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all node labels are fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Nodes are well-sized and visible, though some edges are quite close
+          to each other in dense areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors that work for colorblind users (blue, yellow,
+          green, orange - no red-green only distinction)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, network fills canvas well without being cramped
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for network graphs, axes are correctly turned off
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is present and well-placed, but "Group A/B/C/D" labels are
+          generic rather than meaningful
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct network/node-link diagram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly represented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Force-directed layout, node size encoding degree, visible clusters
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 20 nodes and all edges visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Social Network · network-basic · matplotlib
+          · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clusters, bridges between communities, varying node degrees,
+          but cross-group connections could be more prominent
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Social network with named individuals is a realistic and comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 20 nodes with 31 edges is appropriate for static visualization per
+          spec
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → layout algorithm → plot
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy, plus LineCollection which is used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Code saves as 'plot.png' but doesn't include full path
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses LineCollection for efficient edge rendering and custom force-directed
+          layout, but doesn't leverage matplotlib's more advanced capabilities like
+          FancyArrowPatch or annotations
+  verdict: APPROVED
diff --git a/plots/network-basic/metadata/plotly.yaml b/plots/network-basic/metadata/plotly.yaml
index b95ac9adde..118a6e2a31 100644
--- a/plots/network-basic/metadata/plotly.yaml
+++ b/plots/network-basic/metadata/plotly.yaml
@@ -25,3 +25,169 @@ review:
   - Network is positioned diagonally leaving some canvas corners empty
   - All community groups have exactly 5 members and 6 internal edges - more variation
     would better demonstrate the network concept
+  image_description: 'The plot displays a network graph with 20 nodes representing
+    people in a social network. Nodes are colored by community: blue (Group A - Alice,
+    Bob, Carol, David, Eve), yellow (Group B - Frank, Grace, Henry, Ivy, Jack), green
+    (Group C - Kate, Leo, Mia, Noah, Olivia), and orange/coral (Group D - Paul, Quinn,
+    Ryan, Sara, Tom). Each node is a circular marker with the person''s name displayed
+    inside in dark text. Gray edges connect related nodes both within and across communities.
+    The title "Social Network · network-basic · plotly · pyplots.ai" appears at the
+    top. A legend labeled "Communities" in the top-left corner shows all four groups.
+    The force-directed layout has positioned the network diagonally across the canvas,
+    with communities clustering together while cross-group connections create bridges
+    between them.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All node labels are readable, title is clear. Some labels are slightly
+          small on larger nodes.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels are well-positioned inside nodes
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Node sizes are well-adapted, with degree-based sizing making connected
+          nodes larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct, colorblind-safe colors (blue, yellow, green, coral)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Network is positioned diagonally; some unused space in corners
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed, clean styling with no grid (appropriate for
+          network)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct node-link network diagram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Force-directed layout, node sizing by degree, group coloring all
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 20 nodes and all edges visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 4 community groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Social Network · network-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows communities, bridges between groups, varying degrees. Could
+          show more variation in community sizes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Social network with named individuals is a realistic, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 20 nodes is appropriate; edge density is reasonable but uniform within
+          groups
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → layout algorithm → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html)
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Scatter for nodes/edges but doesn't leverage Plotly's interactive
+          hover features meaningfully or animations
+  verdict: APPROVED
diff --git a/plots/network-basic/metadata/pygal.yaml b/plots/network-basic/metadata/pygal.yaml
index b2b8d92d22..71ad9b9033 100644
--- a/plots/network-basic/metadata/pygal.yaml
+++ b/plots/network-basic/metadata/pygal.yaml
@@ -25,3 +25,174 @@ review:
     Network · network-basic · pygal · pyplots.ai"'
   - Network layout has diagonal bias leaving upper-right and lower-left corners mostly
     empty
+  image_description: 'The plot displays a network graph visualization with 20 circular
+    nodes arranged in a diagonal pattern from upper-left to lower-right. Four distinct
+    community groups are visible, each colored differently: blue (Group A) in the
+    upper-left, yellow (Group B) in the center-left, green (Group C) in the center,
+    and orange (Group D) in the lower-right. Gray lines represent edges connecting
+    nodes both within and between communities. The title "Social Network · network-basic
+    · pygal · pyplots.ai" appears at the top. A legend at the bottom shows "Connections",
+    "Group A", "Group B", "Group C", "Group D". The node sizes are uniform, and the
+    force-directed layout successfully separates the communities while showing cross-group
+    bridges.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and legend text are clearly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes are visible with good dot size, edges visible but thin
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, green, orange) are colorblind-distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Network fills canvas reasonably but has diagonal bias leaving corners
+          empty
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for network graphs, no axis labels needed
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom, no distracting grid
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements network graph (node-link diagram)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has force-directed layout, community colors, edges; missing variable
+          node sizing by degree (spec says "optional")
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 20 nodes and connections visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies communities
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title has extra "Social Network ·" prefix; should be just "network-basic
+          · pygal · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows communities, bridges between groups, internal connections;
+          uniform node size limits degree visibility
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Social network with 20 people in 4 communities is realistic and relatable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 20 nodes with clear connections, appropriate for static visualization
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → layout calculation → chart creation
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png, plot.svg, and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal XY chart creatively for network visualization, custom
+          Style, SVG interactivity with tooltips showing degree info; however pygal
+          is not designed for network graphs so the approach is a workaround
+  verdict: APPROVED
diff --git a/plots/network-basic/metadata/seaborn.yaml b/plots/network-basic/metadata/seaborn.yaml
index 45d941c227..356a429975 100644
--- a/plots/network-basic/metadata/seaborn.yaml
+++ b/plots/network-basic/metadata/seaborn.yaml
@@ -24,3 +24,170 @@ review:
   - Node labels in dense areas (Leo/Jack/Mia region) have minor crowding
   - The force-directed algorithm implementation is quite verbose for a KISS-style
     script
+  image_description: 'The plot displays a social network graph with 20 nodes representing
+    people, connected by gray edges. Nodes are colored by team membership: Team A
+    (blue), Team B (yellow/gold), Team C (green), and Team D (coral/orange). Each
+    node displays the person''s name as a label centered on the node. Node sizes vary
+    based on degree (number of connections). The network shows clear community clustering
+    with the four teams grouped together, connected by bridge edges. The title "Social
+    Network · network-basic · seaborn · pyplots.ai" appears at the top. A legend in
+    the upper left shows the four team colors. The background is white with no axis
+    elements visible (axes are turned off appropriately for a network graph).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and readable at 24pt, node labels are 11pt bold which
+          is readable but slightly small for the canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Most labels are clear, minor crowding in the center-bottom area (Leo/Jack/Mia
+          region)
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Node sizes are well-adapted with degree-based sizing, edges are visible
+          with appropriate alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, orange palette is colorblind-friendly with good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Network fills the canvas well, legend positioned appropriately
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is clean with "Community" title, no grid (appropriate for
+          network)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct node-link network diagram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Force-directed layout, node size encoding degree, group coloring,
+          labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 20 nodes and 31 edges visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows four teams
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Social Network · network-basic · seaborn ·
+          pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clusters, bridge connections, varying node degrees
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Social network with friendship connections is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 20 nodes is appropriate for static visualization per spec
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Code has no functions/classes but the force-directed algorithm is
+          complex inline code
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' which is correct
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn scatterplot with hue for group coloring and size for degree
+          encoding
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of seaborn's hue/size features, but network graphs are not
+          seaborn's strength
+  verdict: APPROVED
diff --git a/plots/network-directed/metadata/altair.yaml b/plots/network-directed/metadata/altair.yaml
index 9131c09df2..1457a0f5f9 100644
--- a/plots/network-directed/metadata/altair.yaml
+++ b/plots/network-directed/metadata/altair.yaml
@@ -29,3 +29,176 @@ review:
     congestion
   - Does not utilize Altair built-in interactivity features like selection highlighting
     or zoom/pan that would help explore the network
+  image_description: 'The plot displays a directed network graph visualizing software
+    package dependencies. The layout is hierarchical, flowing left-to-right with "App"
+    on the far left as the root node, progressing through middleware/router/api layers
+    to utility nodes like "Logger" and "Config" on the right. There are 12 circular
+    nodes color-coded by module type: blue (main: App), yellow (core: Router, API,
+    Auth, Database, Middleware), cyan (service: Cache), gray (util: Utils, Logger,
+    Config), and red (data: Models, Schemas). Directed edges are drawn as gray lines
+    with triangular arrow heads indicating dependency direction. The title reads "network-directed
+    · altair · pyplots.ai" with a subtitle explaining "Software Package Dependencies
+    (curved edges show bidirectional dependencies)". A legend labeled "Module Type"
+    appears in the upper right showing the 5 category colors. Node labels appear above
+    each circle in bold black text.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title (28pt), subtitle (18pt), and node labels (18pt) are clearly
+          readable. Legend text is appropriately sized.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels positioned above nodes with good spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes are appropriately sized (800), arrows visible with triangular
+          markers (size 150). Edge lines slightly thin but visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors that are colorblind-safe (blue, yellow, cyan,
+          gray, red) - no red-green confusion issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good hierarchical layout utilizing ~60% of canvas, slight imbalance
+          with more whitespace at bottom
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate for network), legend well-placed in upper right
+          with clear labels
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct directed network graph with nodes and directed edges
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly represent entities, edges show directed dependencies
+          from source to target
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has directed arrows, node grouping by color, hierarchical layout,
+          curved edges for bidirectional connections as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 nodes and 24 edges visible and within bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps Module Type categories to colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "network-directed · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows directed edges, bidirectional pairs with curves, hierarchical
+          structure, node grouping. Minor: could show edge weights via thickness'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software package dependencies is a perfect, realistic, neutral example
+          for directed graphs
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 12 nodes is appropriate for static visualization (spec says 10-50).
+          Good number of edges (24) showing realistic dependency patterns
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, but has significant
+          computation logic for layout/curves (necessary for network graphs)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair 5.x/6.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses Altair's declarative encoding, layered charts, tooltips, and
+          angle encoding for arrow rotation. However, doesn't leverage Altair's interactivity
+          (selection, zoom/pan) which would enhance network exploration.
+  verdict: APPROVED
diff --git a/plots/network-directed/metadata/bokeh.yaml b/plots/network-directed/metadata/bokeh.yaml
index 2eacfcde02..30232ba472 100644
--- a/plots/network-directed/metadata/bokeh.yaml
+++ b/plots/network-directed/metadata/bokeh.yaml
@@ -23,3 +23,173 @@ review:
   - Edge shortening calculations could be improved for more consistent arrow-to-node
     spacing
   - No curved edges for potential bidirectional relationships
+  image_description: 'The plot displays a directed network graph showing software
+    module dependencies. It uses a circular layout with 12 nodes arranged in a roughly
+    circular pattern. Nodes are colored by group: blue for Core Modules (API, Auth,
+    Database, Models, Routes, Middleware), yellow for Infrastructure (Cache, Logger,
+    Config), teal for Shared Utils (Utils, Validators), and gray for Development (Tests).
+    Directed arrows connect the nodes showing import dependencies, pointing from source
+    to target. The title "network-directed · bokeh · pyplots.ai" appears centered
+    at the top. A legend in the upper right corner explains the color coding for the
+    four module groups. All node labels are clearly readable inside the circles.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title at 36pt, node labels at 22pt
+          bold, legend text at 22pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements; circular layout keeps everything
+          well-separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes are well-sized (size=200), arrows clearly visible with appropriate
+          line_width=4
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Color scheme uses blue, yellow, teal, and gray - all distinguishable
+          and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though slight asymmetry with legend placement
+          creates minor imbalance
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid appropriately hidden for network visualization; legend is well-positioned
+          with background box
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct directed network graph with nodes and arrows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly positioned, edges correctly directed from source
+          to target
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has directed arrows, node groups, clear layout; could benefit from
+          curved edges for bidirectional clarity as spec mentions
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and connections visible within canvas
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes the four module groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "network-directed · bokeh · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows directed relationships, multiple groups, hierarchical dependencies;
+          could show more complex patterns like cycles
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software module dependencies is a perfect, realistic use case mentioned
+          in the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 12 nodes with 23 edges is appropriate for static visualization per
+          spec guidance (10-50 nodes)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → layout → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic layout
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using scatter() instead of circle() for nodes is fine, but could
+          use more idiomatic Bokeh patterns
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Arrow with NormalHead, ColumnDataSource, LabelSet; saves
+          both PNG and interactive HTML which is a Bokeh strength
+  verdict: APPROVED
diff --git a/plots/network-directed/metadata/highcharts.yaml b/plots/network-directed/metadata/highcharts.yaml
index 668801f785..95e0dec585 100644
--- a/plots/network-directed/metadata/highcharts.yaml
+++ b/plots/network-directed/metadata/highcharts.yaml
@@ -24,3 +24,171 @@ review:
     font size would improve legibility
   - Significant empty space on left and right margins - nodes could be spread wider
     to better utilize canvas
+  image_description: The plot displays a directed network graph showing software module
+    dependencies. It features 12 yellow circular nodes labeled with module names (main,
+    api, auth, database, models, utils, config, cache, logging, validation, routes,
+    middleware) arranged in a hierarchical layout. Blue directional edges (arrows)
+    connect the nodes, indicating import dependencies - arrows point from the importing
+    module to the imported module. The title "network-directed · highcharts · pyplots.ai"
+    appears at the top in bold, with a subtitle "Software Module Dependencies (arrows
+    show import direction)" below it. The graph is arranged across approximately 4-5
+    layers showing the hierarchy from main at the top down to cache and utils at the
+    bottom.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and labels are readable but could be slightly larger for the
+          4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all node labels clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes and edges clearly visible, arrows are well-sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow nodes with blue edges, colorblind-safe palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good hierarchical layout but significant empty space on left/right
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for network graphs, using subtitle for context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean background, no distracting elements
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct directed network graph with arrows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has arrows showing direction, clear node labels, hierarchical layout
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 nodes and 22 edges visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this chart type, subtitle explains the visualization
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: network-directed · highcharts · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows directed edges, multiple connection patterns, hierarchy, but
+          no edge weights shown
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software module dependencies is a real, practical use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 12 nodes is appropriate, some module names could be more descriptive
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → HTML generation → screenshot'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Fixed node positions ensure reproducible output
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Highcharts networkgraph module
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of custom arrow rendering, but could leverage more built-in
+          features
+  verdict: APPROVED
diff --git a/plots/network-directed/metadata/letsplot.yaml b/plots/network-directed/metadata/letsplot.yaml
index 9d71055bea..b92bf7bccc 100644
--- a/plots/network-directed/metadata/letsplot.yaml
+++ b/plots/network-directed/metadata/letsplot.yaml
@@ -28,3 +28,166 @@ review:
     complexity
   - HTML export could leverage lets-plot interactive features like tooltips showing
     module details
+  image_description: 'The plot displays a directed network graph visualizing software
+    package dependencies. At the top sits a dark blue "App" node, connected by arrows
+    to a yellow "API" node below. The API node then branches out to multiple modules:
+    Auth (yellow), Database (yellow), and Cache (green) in the middle layer. At the
+    bottom layer are HTTP Client (green), Queue (green), Config (purple), and Logger
+    (purple). A separate Utils node (purple) appears on the right side. Arrows with
+    closed triangle heads show the direction of dependencies, all using a consistent
+    gray color. The background is clean white (theme_void), and a legend on the right
+    shows four module types: application (blue), core (yellow), infrastructure (green),
+    and utility (purple).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and node labels are bold and readable, good font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements; labels positioned below nodes cleanly
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes are well-sized, arrows are visible with proper arrowheads;
+          slightly crowded in the middle layer
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, purple palette is colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good hierarchical layout, plot fills canvas well
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right, clean void theme
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements directed network graph with arrows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly mapped with source→target direction
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has nodes, directed edges with arrows, node grouping by color
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 10 nodes visible within bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 4 module types
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: network-directed · letsplot · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical dependencies, multiple connection patterns, various
+          node degrees; could show bidirectional edges
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software package dependency network is a perfect, realistic example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 10 nodes and 20 edges is appropriate; could have slightly more variety
+          in edge density
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Imports are appropriate but os and shutil for file moving adds complexity
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, geom_segment with arrow(), scale_fill_manual,
+          theme_void; could use more lets-plot specific features like tooltips for
+          HTML version
+  verdict: APPROVED
diff --git a/plots/network-directed/metadata/matplotlib.yaml b/plots/network-directed/metadata/matplotlib.yaml
index e5e66b111f..aeaa4e06d1 100644
--- a/plots/network-directed/metadata/matplotlib.yaml
+++ b/plots/network-directed/metadata/matplotlib.yaml
@@ -24,3 +24,166 @@ review:
   weaknesses:
   - Some node labels are slightly cramped within circles (e.g., validators, test_routes)
   - Node radius could be slightly larger to better accommodate longer labels
+  image_description: 'The plot displays a directed network graph representing software
+    module dependencies. It shows 13 nodes (modules) connected by curved arrows indicating
+    import direction. The nodes are colored by group: Core modules (main, config,
+    database) in Python blue (#306998), Api modules (auth, routes, handlers) in Python
+    yellow (#FFD43B), Utils modules (validators, helpers, logger, cache) in green
+    (#4DAF4A), and Tests modules (test_auth, test_routes, test_db) in purple (#984EA3).
+    The main module sits at the top with arrows flowing downward to its dependencies.
+    Arrows are gray with appropriate arrowheads showing direction clearly. A legend
+    in the upper-left explains the color coding by Module Type. The title "network-directed
+    · matplotlib · pyplots.ai" appears at the top in bold.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt is excellent, node labels at 13pt are readable but
+          could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements, nodes well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes and arrows clearly visible, arrows well-sized with appropriate
+          alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette (blue, yellow, green, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, nodes distributed across the plot
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed upper-left, no grid needed for network graph
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct directed network graph with arrows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes represent modules, edges show import direction
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Arrows clearly visible, curved edges for visual clarity, node groups
+          colored
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 13 nodes and 22 edges fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to module types
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: network-directed · matplotlib · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows directed dependencies, multiple node groups, varying connectivity
+          levels
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software package dependencies is an excellent real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 13 nodes is ideal for static visualization per spec (10-50 range)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple flat structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not strictly needed here)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib.pyplot, numpy, patches)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of FancyArrowPatch with connectionstyle for curved arrows,
+          Circle patches for nodes
+  verdict: APPROVED
diff --git a/plots/network-directed/metadata/plotly.yaml b/plots/network-directed/metadata/plotly.yaml
index f3ca369fa4..3a9c023c88 100644
--- a/plots/network-directed/metadata/plotly.yaml
+++ b/plots/network-directed/metadata/plotly.yaml
@@ -25,3 +25,181 @@ review:
     within their circular markers
   - The graph does not include bidirectional edges to demonstrate curved edge handling
     mentioned in spec notes
+  image_description: 'The plot displays a directed network graph showing software
+    module dependencies. 13 nodes are arranged in a circular layout, each containing
+    a module name (main, api, auth, database, models, utils, config, logging, cache,
+    router, middleware, validators, schemas). Nodes are color-coded by group: Python
+    Blue (#306998) for Entry (main), Python Yellow (#FFD43B) for Core modules (api,
+    auth, database, cache, router, middleware), Teal (#4ECDC4) for Data modules (models,
+    validators, schemas), and Gray (#95A5A6) for Helpers (utils, config, logging).
+    Directed edges connect nodes with gray lines and visible arrowheads pointing toward
+    the target node. The title reads "Software Module Dependencies · network-directed
+    · plotly · pyplots.ai" at the top. A legend titled "Module Groups" appears on
+    the right side listing Entry, Core, Data, and Helpers with their respective colors.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and legend text are clear and readable; node labels are legible
+          but some ("middleware", "validators", "schemas") are slightly cramped within
+          nodes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; nodes well-spaced in circular layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes and edges are clearly visible; arrows are appropriately sized;
+          some edges cross through center making paths harder to trace
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, teal, gray) are colorblind-safe
+          with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Circular layout is centered, good use of canvas space, legend positioned
+          appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Not applicable for network graphs (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate), legend is well-styled with title and clear
+          entries
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct directed network graph with visible arrow direction
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly mapped; arrows point from source to target
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has nodes with IDs/labels, directed edges with arrows, group coloring
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 13 nodes visible, all 19 edges displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all four module groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "Software Module Dependencies · network-directed
+          · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows directed edges, node grouping, and asymmetric relationships;
+          could benefit from bidirectional edges to demonstrate the curved edge handling
+          mentioned in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software module dependency is a perfect real-world scenario matching
+          spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 13 nodes is within recommended 10-50 range; dependency relationships
+          are realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → layout → figure → traces
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also creates plot.html (which is fine for plotly)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter and annotations for arrows; could leverage hover
+          interactivity more; HTML export demonstrates interactive capability
+  verdict: APPROVED
diff --git a/plots/network-directed/metadata/seaborn.yaml b/plots/network-directed/metadata/seaborn.yaml
index fd7fdd0016..0d7565071f 100644
--- a/plots/network-directed/metadata/seaborn.yaml
+++ b/plots/network-directed/metadata/seaborn.yaml
@@ -26,3 +26,171 @@ review:
     may not be rendering in final image
   - Node label font size at 14pt is acceptable but 16pt would improve readability
     at full resolution
+  image_description: 'The plot displays a directed network graph representing software
+    module dependencies. It features 11 circular nodes arranged in a semi-hierarchical
+    layout with "app" at the top. Nodes are color-coded by module type: blue (Core:
+    app, config), yellow (Data: db, cache, model), green (Services: api, auth, router,
+    mware), and coral/red (Utils: log, valid). Directed arrows connect nodes showing
+    dependencies, with curved edges and arrow heads indicating direction. The arrows
+    vary in thickness based on dependency weight. The title "network-directed · seaborn
+    · pyplots.ai" appears at the top in bold. A legend in the upper left shows the
+    four module type categories. The background is a light gray (#fafafa) with white
+    node borders providing good contrast.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is 24pt and clear, node labels at 14pt bold are readable but
+          could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements, curved edges prevent arrow collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes are large (s=3000) and clearly visible, edge weights differentiated
+          by thickness
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, green, coral) are colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, slightly more whitespace on right side
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed in upper left, clean design with module types
+          shown
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct directed network graph with arrows indicating direction
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly positioned, edges properly directed from source to
+          target
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has directed arrows, node grouping, edge weights shown via thickness,
+          curved edges for clarity
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 11 nodes and 19 edges visible within canvas bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all four module types with accurate colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows directed edges, multiple groups, varying weights, but edge
+          weight legend was mentioned in code but not visible in image
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software module dependencies is a highly relevant, realistic use
+          case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: 11 nodes is within spec range (10-50), but could show more complex
+          dependencies
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.patches, plt, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn 0.13+ API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct settings
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot for nodes with hue grouping, sns.set_theme for
+          styling, and seaborn color palette. However, network graphs are not a seaborn
+          specialty, so implementation relies heavily on matplotlib patches.
+  verdict: APPROVED
diff --git a/plots/network-force-directed/metadata/altair.yaml b/plots/network-force-directed/metadata/altair.yaml
index a3097c05a5..2c8105e6bb 100644
--- a/plots/network-force-directed/metadata/altair.yaml
+++ b/plots/network-force-directed/metadata/altair.yaml
@@ -25,3 +25,183 @@ review:
   - Some Hub labels overlap in dense areas (particularly in the Engineering cluster)
   - Edge opacity is quite low (0.4) making some connections difficult to trace visually
   - Generates unnecessary plot.html file in addition to plot.png
+  image_description: 'The plot displays a force-directed network graph with 50 nodes
+    arranged in three distinct community clusters. The nodes are colored by team membership:
+    blue (Engineering) on the right side, yellow (Marketing) in the lower-left area,
+    and coral/salmon red (Sales) in the upper-left area. Gray edges connect nodes
+    within and between communities. Node sizes vary based on degree (number of connections),
+    with larger nodes having more connections. High-degree nodes are labeled "Hub"
+    in dark text above them. The title "network-force-directed · altair · pyplots.ai"
+    appears at the top. A legend labeled "Teams" appears in the upper-right corner
+    with color-coded entries for each community. The layout clearly reveals the three-community
+    structure with visible bridge connections between clusters.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and readable (~28pt), legend labels are clear. Hub
+          labels are slightly small but legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor overlap of some Hub labels in the dense Engineering cluster
+          on the right side.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Node sizes are well-adapted with visible variation by degree. Alpha
+          0.85 provides good visibility with subtle transparency.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral are colorblind-safe and highly distinguishable.
+          Excellent contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, three clusters well-distributed across the
+          space. Plot fills ~60% of canvas.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for network graph (axes hidden appropriately).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: 'Legend placed well in upper-right, but no grid needed for network
+          graphs. However, the edge color is very light (gray #AAAAAA with 0.4 opacity)
+          making some edges hard to trace.'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct force-directed graph visualization.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Node positions correctly computed via Fruchterman-Reingold algorithm.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has nodes, edges, community coloring, node size by degree, hub labels.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 50 nodes and edges visible within the canvas.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Engineering, Marketing, Sales with matching
+          colors.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "network-force-directed · altair · pyplots.ai" format.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows communities, hub nodes, varying degrees, bridge connections.
+          Could have shown weighted edges for complete coverage.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Social network with Engineering, Marketing, Sales teams is a realistic
+          organizational scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 50 nodes is appropriate. Intra-community edge probability of 0.3
+          is reasonable. Bridge edges are manually defined but sparse, which is realistic.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → seed → data generation → force layout → Altair
+          chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only uses altair, numpy, pandas - all necessary.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png (correct) but also saves plot.html. Minor deduction
+          for unnecessary output.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's layered composition (edges + nodes + labels), mark_circle
+          with encoding, Color scale with domain/range, tooltips. However, doesn't
+          use Altair's more distinctive features like selections for interactivity
+          or compound marks.
+  verdict: APPROVED
diff --git a/plots/network-force-directed/metadata/bokeh.yaml b/plots/network-force-directed/metadata/bokeh.yaml
index dbb02d1cd0..4066f94f2e 100644
--- a/plots/network-force-directed/metadata/bokeh.yaml
+++ b/plots/network-force-directed/metadata/bokeh.yaml
@@ -24,3 +24,184 @@ review:
   - Legend placement could be integrated better within the plot area rather than added
     as separate layout
   - Some peripheral nodes appear quite small at the edges of communities
+  image_description: 'The plot shows a force-directed network graph with 50 nodes
+    divided into 3 communities: Engineering (coral/red), Marketing (yellow), and Sales
+    (blue). The three communities are visually separated across the canvas - Engineering
+    in the upper left area, Marketing in the lower middle area, and Sales in the right
+    area. Nodes are connected by light gray edges. Hub nodes (with 7+ connections)
+    are labeled with "Hub" text above them. Node sizes vary based on their degree
+    (number of connections). The title "network-force-directed · bokeh · pyplots.ai"
+    appears at the top. A legend labeled "Teams" showing the three community colors
+    is positioned in the upper left. The white background is clean with no axes or
+    grid lines. Bokeh toolbar icons are visible in the upper right corner.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt is clearly readable, legend text at 18pt and 20pt is
+          legible, "Hub" labels at 14pt are appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; hub labels positioned above nodes with adequate
+          spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes are well-sized with degree-based scaling (15 + degree*3), slightly
+          smaller nodes could be a bit larger for optimal visibility at edges
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Three distinct colors (blue, yellow, coral) are colorblind-safe and
+          clearly distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; communities are well-separated but slightly
+          clustered toward center-right, leaving some empty space in corners
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for network graphs; appropriately no axes shown (axes hidden
+          as expected)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is functional but positioned via `add_layout` to "left" placing
+          it in upper-left corner; it works but could integrate better with the plot
+          area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct force-directed graph implementation using Fruchterman-Reingold
+          algorithm
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly positioned using force simulation; edges connect
+          source to target
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: nodes, edges, community coloring, node
+          size scaling by degree, hub labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full network visible within canvas bounds (normalized to [0.05, 0.95])
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows three teams with accurate colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "network-force-directed · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows community structure, hub nodes, varying node degrees, inter-community
+          bridges; could show more variation in community sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Social network with Engineering/Marketing/Sales teams is a real,
+          comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 50 nodes with 3 communities is appropriate; edge density (~30% intra-community)
+          creates readable structure
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Code follows linear structure but is quite long at 199 lines; could
+          be more concise
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set at the beginning'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, bokeh.io, bokeh.models, bokeh.plotting)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png AND plot.html (both are correct for Bokeh)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, HoverTool with tooltips, Legend with custom
+          items, segment glyphs for edges, interactive toolbar; good use of Bokeh's
+          interactive features
+  verdict: APPROVED
diff --git a/plots/network-force-directed/metadata/highcharts.yaml b/plots/network-force-directed/metadata/highcharts.yaml
index f008e8fae1..e57a48cc1f 100644
--- a/plots/network-force-directed/metadata/highcharts.yaml
+++ b/plots/network-force-directed/metadata/highcharts.yaml
@@ -26,3 +26,175 @@ review:
   - Some node label overlaps in dense clusters (Finn/Carol/Quinn area)
   - Uses raw JavaScript literals instead of highcharts-core Python API (works but
     not idiomatic)
+  image_description: 'The plot displays a force-directed network graph with 32 nodes
+    representing people organized into 4 communities. Nodes are colored by community:
+    Tech (blue), Marketing (yellow), Finance (purple), and Design (cyan). Each node
+    is labeled with a person''s name. The nodes are connected by gray edges showing
+    relationships. The layout algorithm has positioned nodes organically with connected
+    nodes clustering together. The title "network-force-directed · highcharts · pyplots.ai"
+    appears at the top with a subtitle explaining the community color coding. The
+    network is centered on the canvas with substantial white space around it.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are readable, though some node labels are slightly
+          small for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 6
+        max: 8
+        passed: true
+        comment: Minor overlap in some dense areas (e.g., Finn/Carol/Quinn cluster),
+          but most labels readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Nodes are well-sized with large markers (radius: 45), edges clearly
+          visible'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Colorblind-safe palette: blue, yellow, purple, cyan - no red-green
+          issues'
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Network is centered but occupies only ~30-40% of canvas, significant
+          whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for network graphs, but subtitle provides legend explanation
+      - id: VQ-07
+        name: Grid & Legend
+        score: 3
+        max: 2
+        passed: true
+        comment: Subtitle serves as legend, clean background - giving 2/2
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct force-directed network graph using Highcharts networkgraph
+          module
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows communities, node labels, force-directed layout with physics
+          simulation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 32 nodes and edges visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Subtitle explains colors but no formal legend with markers
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: network-force-directed · highcharts · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows community structure, inter/intra community connections, central
+          nodes visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Social network with named people across departments is realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 32 nodes in 4 communities is appropriate, sensible connection density
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses random.seed(42) for deterministic layout
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses raw JS literal construction instead of highcharts-core Python
+          API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Highcharts networkgraph module with physics simulation (verlet
+          integration, gravitational constants, friction, link length)
+  verdict: APPROVED
diff --git a/plots/network-force-directed/metadata/letsplot.yaml b/plots/network-force-directed/metadata/letsplot.yaml
index f3c7a164b6..0839c4d066 100644
--- a/plots/network-force-directed/metadata/letsplot.yaml
+++ b/plots/network-force-directed/metadata/letsplot.yaml
@@ -29,3 +29,187 @@ review:
     optional weight)
   - HTML export could leverage lets-plot interactive features (tooltips showing node/edge
     info)
+  image_description: 'The plot displays a force-directed network graph with 50 nodes
+    organized into 3 distinct community clusters. The nodes are colored by team: blue
+    (#306998) for Engineering (right side), yellow (#FFD43B) for Marketing (center-bottom),
+    and coral-red (#FF6B6B) for Sales (upper-left). Node sizes vary based on their
+    degree (number of connections), with more connected nodes appearing larger. Gray
+    edges (alpha ~0.5) connect the nodes, showing both dense intra-community connections
+    and sparse inter-community bridge edges. The title "network-force-directed · letsplot
+    · pyplots.ai" appears at the top in bold. A legend labeled "Teams" is positioned
+    on the left side. The layout successfully reveals the three distinct communities
+    with clear separation, demonstrating the force-directed algorithm''s ability to
+    cluster connected nodes together.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (28pt), bold, and perfectly readable. Legend text
+          is appropriately sized (14-16pt).
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap. Nodes have some visual overlap but this is acceptable
+          and even desirable for showing dense clusters.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Nodes are well-sized and vary by degree. Edges are visible with
+          appropriate alpha. Minor deduction: some smaller nodes in dense areas could
+          be slightly more visible.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral-red are easily distinguishable and colorblind-safe
+          (no red-green as only distinction).
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas. Plot fills approximately 50-60% of the area.
+          Slight imbalance with more whitespace on the right side.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for network graphs; axes are correctly hidden with element_blank().
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-placed on the left. No grid (appropriate for network).
+          However, legend is somewhat isolated from the main content.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct force-directed graph layout with physics simulation.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes correctly positioned by force simulation, edges connect source/target
+          pairs.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has node size by degree, community coloring, edge visualization,
+          and proper force-directed layout algorithm.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and edges visible within the plot bounds (0.05 to 1.05
+          range).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies the three teams.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows the exact format: "network-force-directed · letsplot
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows community structure, varying node degrees, inter-community
+          bridges, and dense intra-community connections. Could show edge weights
+          more explicitly.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Social network with Engineering, Marketing, and Sales teams is a
+          realistic corporate scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: '50 nodes is within recommended range. Edge density is reasonable.
+          Minor: some communities appear slightly more connected than others.'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → algorithm → dataframes → plot
+          → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Minor: ggsave import path could use lets_plot.export.ggsave more
+          explicitly (currently works but pattern varies).'
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar (ggplot + geom_*), scale_color_manual, coord_fixed,
+          theme customization. Could leverage more lets-plot specific features like
+          tooltips or interactivity in the HTML export.
+  verdict: APPROVED
diff --git a/plots/network-force-directed/metadata/matplotlib.yaml b/plots/network-force-directed/metadata/matplotlib.yaml
index f6f90b9cfc..6639d0c877 100644
--- a/plots/network-force-directed/metadata/matplotlib.yaml
+++ b/plots/network-force-directed/metadata/matplotlib.yaml
@@ -24,3 +24,179 @@ review:
   weaknesses:
   - Minor text overlap where two hub labels partially overlap their respective nodes
   - Python 3.10+ specific code (strict=True in zip) may cause compatibility issues
+  image_description: 'The plot displays a force-directed network graph with 50 nodes
+    organized into 3 distinct community clusters. The three communities are visually
+    separated: Engineering (blue, #306998) on the right side, Marketing (yellow, #FFD43B)
+    in the bottom-left area, and Sales (coral/salmon, #E07B53) in the upper-left/center
+    region. Nodes are sized by their degree (number of connections), with larger nodes
+    representing more connected "hub" individuals. Six hub nodes are labeled with
+    "Hub (E)", "Hub (M)", and "Hub (S)" indicating the top-connected nodes in each
+    team. Edges are rendered as semi-transparent gray lines connecting nodes. The
+    title "network-force-directed · matplotlib · pyplots.ai" appears at the top in
+    bold. A legend in the upper-left corner identifies the three team colors. Axes
+    are turned off, giving a clean network visualization appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt is clearly readable, hub labels at 14pt bold are legible,
+          legend text at 16-18pt is excellent
+      - id: VQ-02
+        name: No Overlap
+        score: 6
+        max: 8
+        passed: true
+        comment: Minor overlap on two hub labels (one "Hub (S)" partially overlaps
+          a node, one "Hub (E)" overlaps its node), but generally readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Node sizes are well-adapted (450-1000+ range based on degree), edges
+          at alpha=0.35 provide good visibility without cluttering
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral are distinguishable for all types of color
+          vision (no red-green only distinction)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins (0.08-0.92 range), balanced
+          whitespace
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate for network), legend well-placed in upper-left
+          with good styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct force-directed graph with physics-based layout (Fruchterman-Reingold
+          algorithm)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly represented; community structure clearly
+          visible
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: node sizing by degree, community colors,
+          labels for key nodes, appropriate edge styling'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 50 nodes visible, layout normalized to full view range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows three teams with accurate colors and labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "network-force-directed · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows communities, bridges between communities, degree-based sizing,
+          hub identification. Could show edge weights but spec lists them as optional
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company departments (Engineering, Marketing, Sales) is a plausible
+          real-world scenario for social network visualization
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 50 nodes is within spec's recommended 20-200 range; edge probabilities
+          create realistic community density (30% intra, sparse inter)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data generation → force layout → plotting
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) at the start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot, numpy, and LineCollection are imported and
+          all are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses strict=True in zip() which requires Python 3.10+ (minor compatibility
+          note)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300 and bbox_inches='tight'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of matplotlib's LineCollection for efficient edge rendering,
+          proper zorder layering, scatter with edgecolors styling, custom legend with
+          handles
+  verdict: APPROVED
diff --git a/plots/network-force-directed/metadata/plotly.yaml b/plots/network-force-directed/metadata/plotly.yaml
index 3535cd629b..bd8e5b36f3 100644
--- a/plots/network-force-directed/metadata/plotly.yaml
+++ b/plots/network-force-directed/metadata/plotly.yaml
@@ -26,3 +26,176 @@ review:
     readability
   - The plot could utilize slightly more canvas area - there is unused whitespace
     on the left and right edges
+  image_description: 'The plot displays a force-directed network graph with 50 nodes
+    organized into three distinct communities. The title "network-force-directed ·
+    plotly · pyplots.ai" is centered at the top. Three communities are visible: **Engineering**
+    (dark blue nodes, right side), **Marketing** (yellow nodes, bottom-center), and
+    **Sales** (coral/salmon red nodes, top-center). Nodes vary in size based on their
+    connection count, with larger nodes indicating more connections. Gray edges connect
+    related nodes, with dense intra-community connections and sparse inter-community
+    bridges visible. "Hub" labels mark high-degree nodes throughout the graph. A legend
+    titled "Teams" appears in the upper-left corner with a white background. The layout
+    uses a clean white background with no axes or gridlines, appropriate for network
+    visualization.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt is clearly readable, legend text at 18pt is legible,
+          Hub labels are visible
+      - id: VQ-02
+        name: No Overlap
+        score: 6
+        max: 8
+        passed: true
+        comment: Minor Hub label overlaps in dense cluster areas, but main content
+          readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Nodes sized 20-65px with scaling by degree, edges at appropriate
+          opacity (0.4)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral are colorblind-distinguishable (no red-green
+          conflict)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot uses ~60% of canvas, slight unused space on
+          edges
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for network graphs, axes correctly hidden
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed with clear border, but grid/axes are hidden as
+          expected
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct force-directed graph layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly represented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Community structure, node degree sizing, edge connections all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: 50 nodes as specified, all visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match community names accurately
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format `{spec-id} · {library} · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows community clusters, hub nodes, bridge connections; could show
+          edge weights
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Corporate team network is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 50 nodes appropriate, bridge edges sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → algorithm → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with hover text, marker styling, and HTML export;
+          could leverage Plotly animation or network-specific traces
+  verdict: APPROVED
diff --git a/plots/network-force-directed/metadata/plotnine.yaml b/plots/network-force-directed/metadata/plotnine.yaml
index ff9ff0cb14..06e33336d1 100644
--- a/plots/network-force-directed/metadata/plotnine.yaml
+++ b/plots/network-force-directed/metadata/plotnine.yaml
@@ -27,3 +27,179 @@ review:
   - Legend sorts departments alphabetically rather than by spatial position or logical
     grouping
   - Yellow color for Design department may have lower contrast against white background
+  image_description: 'The plot displays a force-directed network graph representing
+    team collaboration across 4 departments. The graph shows 40 nodes (circles) colored
+    by department: Engineering (blue/steel blue), Design (yellow/gold), Marketing
+    (green), and Sales (orange/coral). Nodes are sized by degree (number of connections),
+    with more connected individuals appearing larger. The departments form distinct
+    clusters with dense internal connections (solid gray lines) and sparse cross-department
+    bridges (dashed gray lines). The title reads "Team Collaboration · network-force-directed
+    · plotnine · pyplots.ai" at the top. A legend in the upper-left corner identifies
+    the four departments. The background is white with no axes or grid lines, appropriate
+    for network visualization. The layout successfully shows community structure with
+    Engineering (left), Design/Marketing (center), and Sales (right) naturally separated.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title is large and readable, legend text is clear. Minor deduction:
+          legend title could be slightly larger'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; nodes and edges are well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Node sizes are appropriate, edges visible with good alpha values
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color choices with distinct hues, though blue and green could
+          be closer to problematic for some colorblind viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas, network fills the space well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for network graphs; axes appropriately hidden
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-placed but uses alphabetical order (Design, Engineering,
+          Marketing, Sales) rather than spatial arrangement
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct force-directed network graph
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes and edges correctly mapped with positions calculated via physics
+          simulation
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Implements all spec features: force-directed layout, node sizing
+          by degree, edge weighting, community detection'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and edges visible within plot bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies the 4 departments
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: spec-id · library · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows clusters, bridges, varying node degrees. Minor: could show
+          more variation in bridge connection strengths'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Team collaboration network is a perfect real-world scenario for force-directed
+          graphs
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 40 nodes across 4 departments is appropriate. Scale is good but bridge
+          connections could be more varied
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → algorithm → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set correctly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' which is correct
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with geom_segment and geom_point layering,
+          scale_color_manual, and theme customization. The force-directed algorithm
+          is implemented manually rather than using any library-specific features
+          (which plotnine doesn't have for networks).
+  verdict: APPROVED
diff --git a/plots/network-force-directed/metadata/pygal.yaml b/plots/network-force-directed/metadata/pygal.yaml
index b1754a2160..e3c900a318 100644
--- a/plots/network-force-directed/metadata/pygal.yaml
+++ b/plots/network-force-directed/metadata/pygal.yaml
@@ -15,3 +15,176 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: 'The plot displays a force-directed network graph with 50 nodes
+    organized into 3 clearly visible communities. The title "network-force-directed
+    · pygal · pyplots.ai" is displayed at the top. Nodes are colored by community:
+    **blue** (Sales) cluster on the right side, **yellow** (Marketing) cluster in
+    the lower-center, and **red/coral** (Engineering) cluster in the upper-left. Gray
+    lines connect nodes representing edges/relationships. The communities are visually
+    separated with sparse bridge connections between them. A legend at the bottom
+    identifies the four series: "Connections", "Engineering", "Marketing", and "Sales".
+    The layout shows the force-directed algorithm successfully clustering related
+    nodes together.'
+  criteria_checklist:
+    visual_quality:
+      score: 32
+      max: 35
+      items:
+      - id: VQ-01
+        name: Axis labels
+        score: 7
+        max: 7
+        passed: true
+        comment: Appropriately hidden (not applicable for network graphs)
+      - id: VQ-02
+        name: No overlapping text
+        score: 6
+        max: 6
+        passed: true
+        comment: Clean layout, no text overlap
+      - id: VQ-03
+        name: Color choice
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, red are distinguishable and colorblind-safe
+      - id: VQ-04
+        name: Element clarity
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes are clearly visible with appropriate dot size (25)
+      - id: VQ-05
+        name: Layout balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, communities well separated
+      - id: VQ-06
+        name: Grid subtlety
+        score: 3
+        max: 3
+        passed: true
+        comment: Grid appropriately disabled for network visualization
+      - id: VQ-07
+        name: Legend placement
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend at bottom, does not obscure data
+      - id: VQ-08
+        name: Image size
+        score: 0
+        max: 2
+        passed: true
+        comment: 4800x2700 specified but aspect ratio appears slightly off
+    spec_compliance:
+      score: 33
+      max: 35
+      items:
+      - id: SC-01
+        name: Correct plot type
+        score: 10
+        max: 10
+        passed: true
+        comment: Force-directed network graph correctly implemented
+      - id: SC-02
+        name: Data mapped correctly
+        score: 7
+        max: 7
+        passed: true
+        comment: Nodes and edges properly positioned via physics simulation
+      - id: SC-03
+        name: Required features present
+        score: 7
+        max: 7
+        passed: true
+        comment: Shows communities, connections, force-directed layout
+      - id: SC-04
+        name: Data range
+        score: 4
+        max: 4
+        passed: true
+        comment: All nodes visible with appropriate padding
+      - id: SC-05
+        name: Legend accuracy
+        score: 2
+        max: 4
+        passed: true
+        comment: Legend shows series but could be more descriptive
+      - id: SC-06
+        name: Title format
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `{spec-id} · {library} · pyplots.ai` format correctly
+    data_quality:
+      score: 14
+      max: 15
+      items:
+      - id: DQ-01
+        name: Feature coverage
+        score: 5
+        max: 6
+        passed: true
+        comment: Shows communities, varying node degrees, bridge connections; node
+          size by degree mentioned in spec but not implemented
+      - id: DQ-02
+        name: Realistic context
+        score: 5
+        max: 5
+        passed: true
+        comment: Social network with Engineering/Marketing/Sales teams is plausible
+      - id: DQ-03
+        name: Appropriate scale
+        score: 4
+        max: 4
+        passed: true
+        comment: 50 nodes is appropriate, edge density is reasonable
+    code_quality:
+      score: 13
+      max: 15
+      items:
+      - id: CQ-01
+        name: KISS structure
+        score: 4
+        max: 4
+        passed: true
+        comment: Sequential structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Library idioms
+        score: 3
+        max: 3
+        passed: true
+        comment: Proper pygal XY chart usage with Style
+      - id: CQ-04
+        name: Clean imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style imported
+      - id: CQ-05
+        name: Helpful comments
+        score: 0
+        max: 1
+        passed: true
+        comment: Comments present but minimal
+      - id: CQ-06
+        name: No deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API used
+      - id: CQ-07
+        name: Output correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png but also saves plot.svg and plot.html (extra files)
+  verdict: APPROVED
diff --git a/plots/network-force-directed/metadata/seaborn.yaml b/plots/network-force-directed/metadata/seaborn.yaml
index 511ddbfaec..052ca478a5 100644
--- a/plots/network-force-directed/metadata/seaborn.yaml
+++ b/plots/network-force-directed/metadata/seaborn.yaml
@@ -26,3 +26,182 @@ review:
   - Grid is absent but could be argued as appropriate for network visualization
   - Force layout is custom code rather than leveraging a network-specific library
     feature
+  image_description: 'The plot displays a force-directed network graph showing an
+    organizational social network with 37 nodes distributed across 3 departments.
+    The nodes are colored by department: blue (#306998) for Engineering (15 nodes),
+    yellow (#FFD43B) for Marketing (12 nodes), and red (#E74C3C) for Sales (10 nodes).
+    Node sizes vary based on connection degree - larger nodes indicate more connections
+    (hub nodes). Gray edges connect nodes with varying thickness based on connection
+    weight. The layout clearly shows community clustering with Engineering nodes primarily
+    in the upper-left, Marketing nodes in the upper-right, and Sales nodes scattered
+    in the lower portion. High-degree hub nodes are labeled (Node 0, 1, 3, 4, 6, etc.).
+    A legend in the upper-left identifies departments with node counts. The title
+    "network-force-directed · seaborn · pyplots.ai" appears at the top. Network statistics
+    (Nodes: 37 | Edges: 98 | Avg Degree: 5.3) are shown at the bottom. Axis labels
+    indicate "Force-Directed X/Y Position" but tick marks are removed for cleaner
+    visualization.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis labels 20pt, legend 16pt+, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor overlap between some node labels (e.g., Node 7 and Node 14
+          area), but largely readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Node sizes well-adapted to network density, edges visible with appropriate
+          alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/red palette is colorblind-safe (deuteranopia and protanopia
+          friendly)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, network fills ~70% of available space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (force-directed positions are unitless,
+          so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid shown (appropriate for network), legend well-placed but no
+          grid present
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct force-directed network graph using Fruchterman-Reingold algorithm
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Nodes positioned by force simulation, edges connect correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: node size by degree, edge thickness by
+          weight, community structure, hub labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All nodes and edges visible within plot bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows department names with node counts
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format but uses middle dot (·) instead of standard format,
+          minor
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows community structure, hub nodes, varying connection densities;
+          could show more inter-community bridges
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent real-world scenario: organization with Engineering/Marketing/Sales
+          departments'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 37 nodes, 98 edges, avg degree 5.3 - all realistic for organizational
+          network
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → force layout → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set correctly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: false
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot with hue/size mapping and sns.set_theme, but
+          force layout is custom numpy code rather than a seaborn-specific feature
+  verdict: APPROVED
diff --git a/plots/parallel-basic/metadata/altair.yaml b/plots/parallel-basic/metadata/altair.yaml
index b1d62cd139..799171ecb1 100644
--- a/plots/parallel-basic/metadata/altair.yaml
+++ b/plots/parallel-basic/metadata/altair.yaml
@@ -25,3 +25,182 @@ review:
     or click interactions) that would make the parallel coordinates plot more exploratory
   - Dashed grid lines add visual noise in a line-heavy parallel coordinates visualization;
     a cleaner background would improve readability
+  image_description: 'The plot displays a parallel coordinates visualization with
+    four vertical axes representing Iris flower measurements: Sepal Length (cm), Sepal
+    Width (cm), Petal Length (cm), and Petal Width (cm). Each axis shows normalized
+    values from 0 to 1. Three species are color-coded: Setosa (blue #306998), Versicolor
+    (yellow #FFD43B), and Virginica (pink/coral #E85D75). 150 lines (50 per species)
+    traverse across all four axes, with transparency (0.6 opacity) allowing overlapping
+    patterns to be visible. The title "parallel-basic · altair · pyplots.ai" appears
+    at the top center in gray. A legend on the right identifies the species with colored
+    line markers. A subtle dashed grid (0.3 opacity) provides reference. The species
+    show distinct clustering patterns: Setosa (blue) has notably lower Petal Length
+    and Petal Width values, while Virginica (pink) tends toward higher values on those
+    dimensions.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 30pt, axis labels at 20pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, axis labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Lines at 2.5 strokeWidth with 0.6 opacity work well for 150 observations,
+          though some middle sections get dense
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral pink are colorblind-safe and distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart uses canvas well at 1500×850 with scale_factor=3.0, good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Normalized Value" on Y-axis with units implied, dimension names
+          include "(cm)" units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-placed on the right; however, the grid uses gridDash
+          which creates visual noise in a parallel coordinates plot where the lines
+          are the focus
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel coordinates chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dimensions correctly mapped to X-axis, values to Y-axis, categories
+          to color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalization, color coding by category, transparency for overlap
+          handling all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-1 normalized range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all three species with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "parallel-basic · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows distinct species clustering patterns, overlapping regions,
+          and clear separation on Petal dimensions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris dataset is a classic, well-understood real-world scenario for
+          parallel coordinates
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Measurements are realistic for Iris flowers (sepal 4.5-7cm, petal
+          1-6cm)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data generation → transformation
+          → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: While the implementation works, it doesn't leverage Altair's distinctive
+          features like interactive selections, tooltips, or the grammar-of-graphics
+          layering capabilities that make Altair unique
+  verdict: APPROVED
diff --git a/plots/parallel-basic/metadata/bokeh.yaml b/plots/parallel-basic/metadata/bokeh.yaml
index 86355d0f3b..a4de892982 100644
--- a/plots/parallel-basic/metadata/bokeh.yaml
+++ b/plots/parallel-basic/metadata/bokeh.yaml
@@ -25,3 +25,179 @@ review:
   - Missing HoverTool for interactive exploration of individual data points
   - Legend font size could be larger for the 4800x2700 canvas
   - Could benefit from vertical axis lines to better delineate dimensions
+  image_description: 'The plot displays a parallel coordinates visualization with
+    4 vertical axes representing Iris flower measurements: Sepal Length (4.3-7.9 cm),
+    Sepal Width (2.0-4.3 cm), Petal Length (1.1-6.6 cm), and Petal Width (0.1-2.5
+    cm). Each observation is drawn as a line connecting normalized values (0-1) across
+    all axes. Three species are distinguished by color: Setosa (blue #306998), Versicolor
+    (yellow #FFD43B), and Virginica (green #4CAF50). The visualization clearly shows
+    clustering patterns - Setosa lines cluster at low values for petal dimensions
+    while remaining moderate for sepal dimensions. Virginica shows consistently high
+    values across petal measurements. The legend is positioned in the top right corner
+    with "Species" as the title. The title "parallel-basic · bokeh · pyplots.ai" appears
+    in the top left.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable at the output resolution,
+          though axis labels with ranges are slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are visible with appropriate alpha (0.5), though density makes
+          individual lines harder to follow in congested areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green palette is colorblind-friendly with good
+          distinction
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, though the plot area could be slightly larger
+          relative to margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units (cm) and value ranges included
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), legend is well-placed but could use slightly
+          larger font
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel coordinates plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All 4 dimensions correctly mapped to vertical axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalization applied, color coding by category, transparency for
+          overlapping lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes, proper 0-1 normalization
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three species with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format `parallel-basic · bokeh · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clustering patterns and species separation well, though some
+          overlap between Versicolor and Virginica could be more pronounced
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris-like measurements are a classic, realistic dataset for parallel
+          coordinates
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for Iris flowers, though some generated values
+          slightly exceed typical ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses basic Bokeh plotting functions (figure, line) but doesn't leverage
+          Bokeh's interactive capabilities or HoverTool which would enhance a parallel
+          coordinates visualization
+  verdict: APPROVED
diff --git a/plots/parallel-basic/metadata/highcharts.yaml b/plots/parallel-basic/metadata/highcharts.yaml
index cca90f9b57..3c23753194 100644
--- a/plots/parallel-basic/metadata/highcharts.yaml
+++ b/plots/parallel-basic/metadata/highcharts.yaml
@@ -24,3 +24,143 @@ review:
   weaknesses:
   - Y-axis tick label font size could be slightly larger for better readability at
     a glance
+  image_description: 'The plot displays a parallel coordinates visualization of the
+    Iris dataset with 4 vertical axes: Sepal Length (cm), Sepal Width (cm), Petal
+    Length (cm), and Petal Width (cm). Three species are color-coded: Setosa (blue),
+    Versicolor (yellow/gold), and Virginica (purple). Each species has 10 polylines
+    connecting values across the axes. The Setosa lines cluster at the bottom of the
+    Petal Length and Petal Width axes, clearly separating from the other species.
+    The title "Iris Dataset · parallel-basic · highcharts · pyplots.ai" appears at
+    the top. A horizontal legend at the bottom identifies the three species. The layout
+    is clean with good margins and readable text.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable; tick labels slightly small but
+          legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are well-visible with appropriate transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/purple palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes labeled with units (cm)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom, axes visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel coordinates chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 4 dimensions correctly mapped to parallel axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Color coding by category, transparency for overlap
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data ranges properly shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Three species correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses {description} · {spec-id} · {library} · pyplots.ai
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Clear species separation visible, shows clustering and variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic Iris dataset, real botanical measurements
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Authentic Iris measurement values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic hardcoded data, no random seed needed
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/parallel-basic/metadata/letsplot.yaml b/plots/parallel-basic/metadata/letsplot.yaml
index 4f67295576..98cc4d0b86 100644
--- a/plots/parallel-basic/metadata/letsplot.yaml
+++ b/plots/parallel-basic/metadata/letsplot.yaml
@@ -23,3 +23,175 @@ review:
   - Tick value labels (min/max) positioned slightly far from axes and could be larger
   - Yellow color for Versicolor could have more saturation for better visibility against
     white
+  image_description: 'The plot displays a parallel coordinates visualization of the
+    Iris dataset with 4 dimensions (Sepal Length, Sepal Width, Petal Length, Petal
+    Width). Four vertical black axes are evenly spaced across the canvas. 30 observation
+    lines (10 per species) connect values across axes. Lines are colored by species:
+    blue for Setosa, yellow for Versicolor, and red for Virginica. Each axis has min/max
+    value labels on the left side. Dimension labels appear below each axis with units
+    in cm. The title "parallel-basic · letsplot · pyplots.ai" appears in the top-left.
+    A legend in the right portion identifies the three species. The background is
+    clean white with no grid. Clear visual separation exists between species clusters,
+    especially for Setosa (blue lines clustered at low petal values).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and dimension labels clearly readable; tick value labels slightly
+          small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; axis labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines clearly visible with good alpha (0.7); line width appropriate
+          for 30 observations
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/red scheme has good contrast; yellow-on-white could be
+          slightly brighter
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; legend appropriately
+          positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units (cm) for all dimensions
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed; no grid (appropriate for this plot type), but
+          axis lines could be more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel coordinates implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Four dimensions correctly mapped to vertical axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalized axes, color by category, transparency for overlapping
+          lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Min/max values displayed for each axis showing full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Species legend correctly identifies the three Iris categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exactly matches required format: parallel-basic · letsplot · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear cluster separation between species; demonstrates pattern
+          identification capability
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic Iris dataset - perfect real-world scenario for parallel coordinates
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Authentic Iris measurements; normalized 0-1 range appropriate
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear structure but uses some helper loops for data transformation
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic hardcoded data (no random seed needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Saves both plot.png and plot.html (minor: path=''.'' argument unusual)'
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of ggplot2 grammar with geom_line, geom_segment, geom_text,
+          scale_color_manual, and theme customization
+  verdict: APPROVED
diff --git a/plots/parallel-basic/metadata/matplotlib.yaml b/plots/parallel-basic/metadata/matplotlib.yaml
index 6234c97de9..bf9ec93edf 100644
--- a/plots/parallel-basic/metadata/matplotlib.yaml
+++ b/plots/parallel-basic/metadata/matplotlib.yaml
@@ -26,3 +26,176 @@ review:
     outside the plot area or in a less obstructed corner
   - Line width of 2 could be increased to 2.5-3 for better visibility at full 4800x2700
     resolution
+  image_description: 'The plot shows a parallel coordinates visualization with 4 vertical
+    axes representing Iris dataset features: Sepal Length (4.3-7.9 cm), Sepal Width
+    (2.0-4.4 cm), Petal Length (1.3-6.6 cm), and Petal Width (0.1-2.5 cm). Each observation
+    is represented as a line connecting normalized values (0-1) across all axes. Three
+    species are color-coded: Setosa (blue, #306998), Versicolor (yellow, #FFD43B),
+    and Virginica (green, #4CAF50). The title reads "parallel-basic · matplotlib ·
+    pyplots.ai". Lines use alpha=0.5 for transparency. A legend in the upper right
+    identifies the species. The plot clearly shows the clustering patterns - Setosa
+    (blue) has distinctly low petal length and width values, while Versicolor and
+    Virginica show more overlap but are still distinguishable.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 18pt, y-label at 20pt, tick labels
+          at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Lines are visible with alpha=0.5 and linewidth=2, appropriate for
+          150 observations, though could be slightly thicker for better clarity at
+          full resolution
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Green palette is colorblind-safe, good contrast between
+          categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills the space well with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units in parentheses (cm ranges shown)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is horizontal only (y-axis) which is appropriate, but legend
+          could be better positioned - currently in upper right which overlaps some
+          data lines
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel coordinates chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Each dimension correctly mapped to a vertical axis, lines connect
+          values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalization applied, color coding by category, transparency for
+          overlapping lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full normalized range (-0.05 to 1.05), x-axis labels
+          show original scale ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three species with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "parallel-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clustering patterns well, distinct separation visible especially
+          for Setosa, some overlap between Versicolor/Virginica demonstrates real-world
+          ambiguity
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses Iris dataset pattern which is a classic, well-known real-world
+          scenario for multivariate analysis
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for Iris measurements, though clipping bounds
+          are slightly narrower than actual Iris dataset ranges
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, pandas used - all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses iterrows() which works but is not the most efficient pandas
+          pattern
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/parallel-basic/metadata/plotly.yaml b/plots/parallel-basic/metadata/plotly.yaml
index 4a40fc4b48..1f923d17de 100644
--- a/plots/parallel-basic/metadata/plotly.yaml
+++ b/plots/parallel-basic/metadata/plotly.yaml
@@ -27,3 +27,185 @@ review:
     hints) that would showcase library strengths
   - Yellow-green color distinction in colorscale may be challenging for some colorblind
     users; consider using a more distinct three-color discrete palette
+  image_description: 'The plot displays a parallel coordinates visualization of Iris
+    flower measurements with four vertical axes: Sepal Length (cm), Sepal Width (cm),
+    Petal Length (cm), and Petal Width (cm). Lines connect data points across all
+    dimensions, colored by species using a blue-yellow-green gradient colorscale.
+    A vertical colorbar on the right indicates three species: Setosa (blue at bottom),
+    Versicolor (yellow in middle), and Virginica (green at top). The background is
+    white/clean. The three species clusters are clearly distinguishable - Setosa lines
+    (blue) show notably smaller petal measurements clustering at the lower range of
+    petal axes, Versicolor (yellow) occupies middle ranges, and Virginica (green)
+    shows higher values especially for petal length and width. The title "Iris Flower
+    Measurements · parallel-basic · plotly · pyplots.ai" is centered at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt, labels at 22pt, ticks at 16pt - all clearly readable,
+          slightly small axis labels relative to canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, all labels and values are distinct
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are visible with good color distinction, though with 150 lines
+          some overlap is inevitable; transparency could be slightly higher for better
+          clarity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue-yellow-green colorscale is reasonably colorblind-friendly, though
+          yellow-green distinction may be challenging for some
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good margins, plot fills canvas appropriately, colorbar positioned
+          well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes have descriptive labels with units (cm)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar serves as legend with species labels; no explicit grid but
+          clean white background works well for parallel coords
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel coordinates plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Four numeric dimensions correctly mapped to vertical axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: multiple dimensions, categorical coloring,
+          normalized display'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labels all three species
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows `{context} · {spec-id} · {library} · pyplots.ai` format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows clustering patterns for all three species with clear separation
+          between groups, demonstrates the power of parallel coords for multivariate
+          analysis; minor: could show more overlap/ambiguity between versicolor/virginica
+          which exists in real Iris data'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris dataset is a classic, real-world botanical dataset perfect for
+          demonstrating parallel coordinates
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values match realistic Iris flower measurements (sepal length 4-8cm,
+          petal length 1-7cm, etc.)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data generation → plot creation
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and plotly.graph_objects imported, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs both plot.png and plot.html which is correct for Plotly,
+          but technically spec says plot.png (minor)
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Parcoords which is Plotly-specific, but doesn't leverage
+          interactivity features that make Plotly distinctive (hover tooltips, brushing/filtering
+          on axes, custom hover templates). HTML output is generated but no interactive
+          features are explicitly configured beyond defaults.
+  verdict: APPROVED
diff --git a/plots/parallel-basic/metadata/plotnine.yaml b/plots/parallel-basic/metadata/plotnine.yaml
index 486265ec64..03f490694f 100644
--- a/plots/parallel-basic/metadata/plotnine.yaml
+++ b/plots/parallel-basic/metadata/plotnine.yaml
@@ -23,3 +23,174 @@ review:
   - Uneven species distribution (20/20/10) could be balanced as 50 samples equally
     distributed
   - Legend title could use a more specific label than generic Species
+  image_description: 'The plot displays a parallel coordinates visualization with
+    four vertical axes representing Iris flower measurements: Sepal Length (cm), Sepal
+    Width (cm), Petal Length (cm), and Petal Width (cm). The Y-axis shows normalized
+    values from 0 to 1. Three species are color-coded: Setosa in steel blue (#306998),
+    Versicolor in golden yellow (#FFD43B), and Virginica in coral red (#E74C3C). Each
+    observation is represented by a line connecting its values across the four dimensions,
+    with small circular markers at each axis intersection. The plot clearly shows
+    species clustering - Setosa (blue) clusters low on petal measurements while having
+    varied sepal measurements, Versicolor (yellow) occupies the middle range, and
+    Virginica (red) shows the highest values for petal dimensions. The title "parallel-basic
+    · plotnine · pyplots.ai" appears at the top, and a legend is positioned on the
+    right side.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Lines and points visible with good alpha (0.6/0.8), some overlap
+          at convergence points but expected for parallel coordinates
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette (blue, yellow, red) with high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of 16:9 canvas, plot fills appropriate space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sepal Length (cm)" etc.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle, but minor grid is disabled; legend well placed but
+          could be closer
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel coordinates implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Each dimension correctly mapped to vertical axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Normalization, color coding by category, transparency for overlapping
+          lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-1 normalized range visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all three species
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "parallel-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows clustering patterns, but unequal species distribution (20/20/10)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris dataset is a classic, real-world multivariate dataset
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Actual Iris measurements, realistic biological values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Normalize → Transform → Plot → Save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (plotnine elements, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses grammar of graphics approach with ggplot + geom_line + geom_point
+          + aes mapping, but parallel coordinates is not a native plotnine feature
+  verdict: APPROVED
diff --git a/plots/parallel-basic/metadata/seaborn.yaml b/plots/parallel-basic/metadata/seaborn.yaml
index 2dfd0deeae..5969ccca74 100644
--- a/plots/parallel-basic/metadata/seaborn.yaml
+++ b/plots/parallel-basic/metadata/seaborn.yaml
@@ -25,3 +25,184 @@ review:
     data generation with fixed seed
   - Could leverage more seaborn-specific features rather than relying on lineplot
     as a workaround
+  image_description: 'The plot displays a parallel coordinates visualization using
+    the Iris dataset. Four vertical axes represent the normalized measurements: Sepal
+    Length (4.3-7.9 cm), Sepal Width (2.0-4.4 cm), Petal Length (1.0-6.9 cm), and
+    Petal Width (0.1-2.5 cm). Lines connect each observation across all dimensions,
+    colored by species: blue (#306998) for setosa, yellow (#FFD43B) for versicolor,
+    and green (#4CAF50) for virginica. The title follows the required format "parallel-basic
+    · seaborn · pyplots.ai". The legend is positioned in the upper right corner with
+    clear species labels. Lines have good transparency (alpha=0.5) allowing overlap
+    patterns to be visible. The y-axis shows "Normalized Value" from 0 to 1.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 18pt+, y-axis ticks at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis labels are well-spaced with
+          two-line format
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines well-visible with linewidth=2 and alpha=0.5, good balance for
+          150 observations. Minor deduction as some lines could be slightly more distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green palette is colorblind-friendly with good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though slight margin imbalance on the right
+          side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units (cm) shown in the x-axis tick labels
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with whitegrid style, but legend could be slightly
+          better positioned to avoid potential overlap with data lines in upper right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel coordinates implementation using seaborn lineplot
+          with units parameter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Four dimensions correctly mapped to x-axis categories, normalized
+          values to y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: normalization, color by category, transparency
+          for overlapping lines'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis properly shows 0-1 normalized range with slight padding (-0.05
+          to 1.05)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Species labels correctly match the data categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "parallel-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear cluster separation (setosa distinct from others), correlation
+          patterns visible between petal dimensions
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Iris dataset is a classic, real dataset. Minor deduction as the spec
+          suggested custom scenarios like health metrics or product features
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Original data ranges shown in axis labels, normalized values make
+          sense
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → normalization → reshape
+          → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses external URL for data without fixed seed. The iris dataset is
+          deterministic but loading from URL adds external dependency
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, pandas, and seaborn imported, all are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot with units parameter which is a valid approach,
+          but seaborn doesn't have a native parallel coordinates function. The implementation
+          is more of a matplotlib solution with seaborn theming. Could have leveraged
+          more seaborn-specific features.
+  verdict: APPROVED
diff --git a/plots/parallel-categories-basic/metadata/altair.yaml b/plots/parallel-categories-basic/metadata/altair.yaml
index e0a93c9646..c9a161ba3b 100644
--- a/plots/parallel-categories-basic/metadata/altair.yaml
+++ b/plots/parallel-categories-basic/metadata/altair.yaml
@@ -26,3 +26,177 @@ review:
   - Some thinner flow ribbons are harder to trace across the full diagram
   - Count labels positioned to the right of boxes overlap with ribbon starting points
     on some boxes
+  image_description: 'The plot displays a parallel categories diagram with three vertical
+    dimensions: Channel (left), Category (middle), and Outcome (right). The Channel
+    dimension shows four colored bars: Email (purple, 31 count), Social (light blue,
+    35), Direct (dark blue, 66), and Search (yellow, 68). The Category column shows
+    grey bars for Sports, Electronics, Home, and Clothing. The Outcome column shows
+    grey bars for Browse, Abandon, and Purchase. Smooth bezier curve ribbons connect
+    categories across dimensions, colored by source channel. Column headers appear
+    in blue text above each dimension. A "Flow Colors" legend on the right maps colors
+    to channels. The title "parallel-categories-basic · altair · pyplots.ai" appears
+    at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title 28pt, labels 18pt, headers 24pt. All readable, tick counts
+          slightly small but acceptable.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Ribbons visible with appropriate width proportional to count. Some
+          thin flows harder to trace.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Good color choices: blue, yellow, purple palette is colorblind-friendly.'
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot well-centered with balanced margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for parallel categories (no traditional axes), headers serve
+          this purpose.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend present and well-positioned, no grid needed.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel categories with ribbons connecting categorical dimensions.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Three categorical dimensions correctly displayed with width-proportional
+          ribbons.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Ribbons show flow, width proportional to count. Color by first dimension
+          (Channel) as spec suggests. Missing: interactive highlighting noted in spec.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories visible and properly scaled.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Flow Colors legend accurately maps channel to color.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "{spec-id} · {library} · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multi-dimensional categorical flow well. 3 dimensions with
+          4, 4, and 3 categories each. Could show more variation in flow patterns.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer journey e-commerce scenario is excellent, comprehensible
+          real-world application.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 200 customers with realistic distribution. Counts are sensible.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear structure but complex bezier calculation loop adds
+          some complexity.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (altair, numpy, pandas).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair 5.x API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of Altair's layered composition, mark_line for bezier
+          flows, mark_rect for boxes, mark_text for labels, mark_square for legend.
+          Excellent use of declarative grammar.
+  verdict: APPROVED
diff --git a/plots/parallel-categories-basic/metadata/bokeh.yaml b/plots/parallel-categories-basic/metadata/bokeh.yaml
index 0b1f80e4a1..390292abf2 100644
--- a/plots/parallel-categories-basic/metadata/bokeh.yaml
+++ b/plots/parallel-categories-basic/metadata/bokeh.yaml
@@ -25,3 +25,176 @@ review:
   - Some thinner ribbons representing less common paths are harder to trace visually
   - Legend placement in upper right creates slight visual imbalance with the main
     chart
+  image_description: 'The plot displays a parallel categories diagram with three dimensions:
+    Channel (left), Category (middle), and Outcome (right). Each dimension is represented
+    by a dark gray vertical bar with category labels positioned beside them. Colored
+    ribbons with bezier curves connect categories across dimensions, showing the flow
+    of 500 simulated product journey observations. The color scheme uses blue (#306998)
+    for Online, yellow (#FFD43B) for Store, and green (#4DAF4A) for Mobile channels.
+    A legend in the upper right corner identifies these colors. Dimension titles ("Channel",
+    "Category", "Outcome") appear at the top. Category labels are clearly readable:
+    Mobile/Store/Online on the left, Home/Clothing/Electronics in the middle, and
+    Exchanged/Returned/Purchased on the right. The background is a subtle light gray
+    (#FAFAFA), and ribbons have 60% opacity to show overlapping flows.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, dimension labels at 36pt, category labels at 28pt,
+          legend at 24pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels positioned to avoid collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Ribbons well-sized with good alpha transparency; minor deduction
+          for some thin ribbons being less visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/green palette is colorblind-friendly, avoids red-green
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, but legend placement creates slight asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for parallel categories (no traditional axes), dimension titles
+          serve this purpose
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid appropriately hidden, manual legend well-placed
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel categories visualization with ribbons
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Three categorical dimensions correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Width-proportional ribbons present, color by first dimension; minor:
+          no interactive highlighting (though HTML version available)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories visible, full range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies channel colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"parallel-categories-basic · bokeh · pyplots.ai" format correct'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows diverse flows, varying ribbon widths, crossings between categories;
+          could show more variation in outcome proportions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product purchase journey (Channel → Category → Outcome) is a realistic
+          e-commerce scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 500 observations with 3 dimensions × 3 categories each is reasonable;
+          probabilities create realistic patterns
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used, but `Label` could be consolidated
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Implements parallel categories from scratch using Bokeh primitives
+          (patches, ColumnDataSource, Labels); saves both PNG and HTML for interactivity
+  verdict: APPROVED
diff --git a/plots/parallel-categories-basic/metadata/highcharts.yaml b/plots/parallel-categories-basic/metadata/highcharts.yaml
index 701199fb4b..0c05719a9a 100644
--- a/plots/parallel-categories-basic/metadata/highcharts.yaml
+++ b/plots/parallel-categories-basic/metadata/highcharts.yaml
@@ -27,3 +27,179 @@ review:
   - Red/green color combination for Survived/Died may be difficult for colorblind
     users; consider using blue/orange instead
   - The code complexity for node naming (suffixes like _M1, _AM1) could be simplified
+  image_description: The plot displays a Sankey/parallel categories diagram showing
+    Titanic passenger survival data flowing through four dimensions. The leftmost
+    column shows passenger classes (1st Class in blue, 2nd Class in purple, 3rd Class
+    in cyan). The second column shows Sex (Male in dark blue, Female in pink). The
+    third column shows Age Group (Adult in purple, Child in sage green). The rightmost
+    column shows Outcome (Survived in green, Died in red). Ribbons connect categories
+    proportionally by passenger count, with the largest flows showing 3rd Class male
+    adults who died. The title "Titanic Survival · parallel-categories-basic · highcharts
+    · pyplots.ai" appears at top with a subtitle explaining the flow. Dimension labels
+    (Class, Sex, Age Group, Outcome) appear at the bottom.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title is large and bold, node labels
+          are legible, dimension labels at bottom are prominent'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all node labels are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ribbon widths are proportional and clearly visible; even small flows
+          like children are distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses a diverse palette that is mostly colorblind-friendly, though
+          red/green for Survived/Died could be problematic for some viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; diagram fills the area well with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for parallel categories (no traditional axes), but dimension
+          labels serve this purpose
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed; legend disabled since nodes are self-labeled, which
+          is appropriate
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel categories implementation using Sankey chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Four categorical dimensions correctly mapped with proper flow structure
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Ribbon widths proportional to count, color by category, clear flow
+          visualization
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data flows shown, including small categories
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Node labels accurate; no separate legend needed as nodes are self-describing
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Titanic Survival · parallel-categories-basic ·
+          highcharts · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: multiple classes, both sexes, age groups, and
+          survival outcomes with varying proportions'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses classic Titanic dataset, a well-known and neutral educational
+          example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Passenger counts are historically accurate and sensible
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no explicit seed needed; minor
+          deduction for complexity of node naming
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but also creates intermediate plot_raw.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Sankey module, accessibility options, custom node positioning,
+          hover states, and animation. Could use more advanced features like drilldown.
+  verdict: APPROVED
diff --git a/plots/parallel-categories-basic/metadata/letsplot.yaml b/plots/parallel-categories-basic/metadata/letsplot.yaml
index 1d3d447134..4dcb350476 100644
--- a/plots/parallel-categories-basic/metadata/letsplot.yaml
+++ b/plots/parallel-categories-basic/metadata/letsplot.yaml
@@ -24,3 +24,180 @@ review:
   weaknesses:
   - Category label text could be slightly larger (size 14 → 16) for optimal readability
     at 4800×2700
+  image_description: 'The plot displays a parallel categories diagram with 4 vertical
+    dimensions (Channel, Product, Size, Outcome) connected by smooth curved ribbons.
+    The ribbons are colored by acquisition channel: blue (#306998) for Online, green
+    (#27AE60) for Store, and yellow (#FFD43B) for Mobile. Each category node is rendered
+    as a dark blue-gray rectangle with labels showing the category name and count
+    in parentheses (e.g., "Online (270)", "Completed (579)"). Dimension headers appear
+    in bold at the top of each vertical axis. The legend at the bottom shows "Acquisition
+    Channel" with the three color mappings. The ribbons use 50% alpha transparency
+    allowing overlapping flows to be distinguished.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and dimension headers are clearly readable. Category labels
+          are readable but could be slightly larger for optimal viewing at full resolution
+          (size 14 vs recommended 16+)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlaps; labels positioned on alternating sides of nodes
+          to prevent collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ribbons are well-sized with appropriate alpha (0.5), nodes are distinct
+          dark rectangles
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, green, and yellow are colorblind-safe (not red-green dependent)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with plot occupying ~70% of space; bottom area
+          has some unused whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for this plot type (no traditional axes); dimension headers serve
+          this purpose
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No distracting grid (appropriate for this plot), legend well-placed
+          at bottom
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel categories with width-proportional ribbons
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Four categorical dimensions correctly mapped with flow counts
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Ribbons proportional to count, colored by first dimension (Channel),
+          proper category ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories and flows visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Acquisition Channel" with accurate color
+          mapping
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "parallel-categories-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple channels, products, sizes, and outcomes with varied
+          flow sizes; could show more extreme contrasts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: E-commerce customer journey is a perfect, neutral, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible (45, 32, 18, etc.) though some flows are quite
+          similar in size
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Now uses flat structure without functions/classes (improved from
+          previous attempt)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded list), no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" but path="." may cause issues
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (geom_polygon, geom_rect, geom_text, theme_minimal)
+          appropriately, but this is a custom implementation rather than a native
+          lets-plot parallel categories geom
+  verdict: APPROVED
diff --git a/plots/parallel-categories-basic/metadata/matplotlib.yaml b/plots/parallel-categories-basic/metadata/matplotlib.yaml
index 3f4b533898..81362ed157 100644
--- a/plots/parallel-categories-basic/metadata/matplotlib.yaml
+++ b/plots/parallel-categories-basic/metadata/matplotlib.yaml
@@ -26,3 +26,169 @@ review:
     be confused; recommend using more distinct colors like green for Mobile
   - The second-hop ribbon coloring logic uses mode() which may not accurately trace
     back the original channel for all flows
+  image_description: 'The plot displays a parallel categories diagram with three vertical
+    dimensions: Channel (Online, Store, Mobile), Category (Sports, Home, Clothing,
+    Electronics), and Outcome (Abandoned, Returned, Purchased). Dark navy blue rectangular
+    bars represent each category, with white bold text labels inside. Smooth bezier
+    curve ribbons connect categories between dimensions, showing the flow of 500 observations.
+    Ribbons are colored by source channel: blue for Online, yellow/gold for Store,
+    and a lighter blue for Mobile. The ribbons have appropriate transparency (alpha=0.6)
+    showing overlaps. The title "parallel-categories-basic · matplotlib · pyplots.ai"
+    appears at the top in dark navy. A legend in the bottom-right shows the Channel
+    color coding. The plot uses a white background with no axes or grid.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, dimension labels at 20pt, category labels at 14pt
+          bold white - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels well-positioned within bars
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Ribbons clearly visible with good alpha, bars well-sized, though
+          some ribbon flows are thin
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/blue scheme is mostly distinguishable, though Online
+          and Mobile blues are similar
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend appropriately placed
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed with good formatting, no grid needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel categories plot with ribbons connecting categorical
+          dimensions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Three categorical dimensions correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Width-proportional ribbons, color by source dimension, category bars
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories visible and properly scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Channel colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: parallel-categories-basic · matplotlib · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows flow patterns across three dimensions, though random data doesn't
+          demonstrate meaningful correlations
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer purchase flow scenario is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 500 samples with realistic distribution probabilities
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Script structure is linear but code is more complex than typical
+          KISS style due to manual path creation
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of matplotlib.path.Path for bezier curves and mpatches for
+          custom shapes, but this is a workaround since matplotlib lacks native parallel
+          categories support
+  verdict: APPROVED
diff --git a/plots/parallel-categories-basic/metadata/plotly.yaml b/plots/parallel-categories-basic/metadata/plotly.yaml
index b26be3116c..baa1af0a3f 100644
--- a/plots/parallel-categories-basic/metadata/plotly.yaml
+++ b/plots/parallel-categories-basic/metadata/plotly.yaml
@@ -22,3 +22,173 @@ review:
   weaknesses:
   - The Did Not Survive label on the right side is slightly cut off - consider adjusting
     margins or abbreviating
+  image_description: 'The plot displays a parallel categories diagram visualizing
+    Titanic passenger data across four categorical dimensions. From left to right:
+    **Passenger Class** (First, Second, Third), **Sex** (Female, Male), **Embarked**
+    (Cherbourg, Queenstown, Southampton), and **Outcome** (Survived, Did Not Survive).
+    Ribbons flow between categories using smooth hspline curves. The color scheme
+    uses **Python Blue (#306998)** for passengers who did not survive and **bright
+    yellow (#FFD43B)** for survivors. The title "Titanic Passengers · parallel-categories-basic
+    · plotly · pyplots.ai" is centered at the top. A helpful annotation at the bottom
+    explains the color encoding. The layout is well-balanced with adequate margins,
+    though the "Did Not Survive" label on the far right is slightly truncated.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text is clearly readable with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ribbons are well-sized and flow is clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but "Did Not Survive" text is slightly cut off on
+          right edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive dimension labels (Passenger Class, Sex, Embarked, Outcome)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No traditional legend needed for this plot type; annotation serves
+          as color guide
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel categories plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All categorical dimensions correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Ribbons show flow, width proportional to count, colored by outcome
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories visible and properly ordered
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color annotation clearly explains blue=not survived, yellow=survived
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{context} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: multiple dimensions, varied category sizes, clear
+          survival patterns'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Titanic dataset is a classic, neutral, well-understood real-world
+          dataset
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Real passenger data with meaningful category distributions
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses seaborn dataset which is deterministic, but no explicit seed
+          comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (plotly.graph_objects, seaborn for data)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Parcats with hspline curves, hoveron="color", hoverinfo, arrangement
+          options
+  verdict: APPROVED
diff --git a/plots/parallel-categories-basic/metadata/plotnine.yaml b/plots/parallel-categories-basic/metadata/plotnine.yaml
index 7cafe13470..7b62fee803 100644
--- a/plots/parallel-categories-basic/metadata/plotnine.yaml
+++ b/plots/parallel-categories-basic/metadata/plotnine.yaml
@@ -25,3 +25,177 @@ review:
   - Middle dimension category labels positioned below nodes are slightly smaller (9pt)
     and could be harder to read
   - Some imports may be unused (coord_cartesian could be replaced with theme settings)
+  image_description: 'The plot displays a parallel categories diagram showing customer
+    journey data across four categorical dimensions: Channel (Online, Store, Mobile),
+    Product (Electronics, Clothing, Home), Customer (Returning, New), and Outcome
+    (Purchased, Abandoned). The ribbons connecting categories are colored by outcome
+    - yellow (#FFD43B) for Purchased and blue (#306998) for Abandoned, with 50% transparency.
+    Gray rectangular nodes represent each category with white count labels inside
+    (e.g., 1087 for Online, 1126 for Electronics). Dimension labels appear at the
+    top (Channel, Product, Customer, Outcome). Category labels are positioned: left
+    side for first dimension, right side for last dimension, and below nodes for middle
+    dimensions. The layout uses 16:9 aspect ratio with the title "parallel-categories-basic
+    · plotnine · pyplots.ai" at top center. The legend on the right shows Outcome
+    colors.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt is clear, dimension labels at 14pt readable, category
+          labels at 9-10pt readable but slightly small for middle dimensions
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels well positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ribbons clearly visible with appropriate alpha (0.5), nodes well-sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow vs blue is colorblind-safe (not red-green), good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels (but appropriate for this plot type that uses dimension
+          labels instead)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate), legend well-placed on right
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel categories with width-proportional ribbons
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Four categorical dimensions correctly mapped with flow connections
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has ribbons showing flow, coloring by outcome, category labels.
+          Minor: could show more visual emphasis on ribbon width proportionality'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories and dimensions displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Purchased/Abandoned colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "parallel-categories-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple dimensions, varied path flows, both outcomes. Good
+          variation in ribbon widths showing different path frequencies
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer journey through e-commerce is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Counts are reasonable (12-187 per path), totals make sense for customer
+          data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: coord_cartesian imported but could use xlim/ylim in theme instead
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses plotnine grammar of graphics
+        score: 3
+        max: 5
+        passed: true
+        comment: Effectively uses geom_polygon for ribbons, geom_rect for nodes, geom_text
+          and annotate for labels. This is a custom implementation since plotnine
+          doesn't have native parallel categories support, demonstrating creative
+          use of basic geoms.
+  verdict: APPROVED
diff --git a/plots/parallel-categories-basic/metadata/pygal.yaml b/plots/parallel-categories-basic/metadata/pygal.yaml
index a2fe924f8d..3cea2c7806 100644
--- a/plots/parallel-categories-basic/metadata/pygal.yaml
+++ b/plots/parallel-categories-basic/metadata/pygal.yaml
@@ -25,3 +25,186 @@ review:
     but could overlap with ribbons in denser visualizations
   - Cancelled outcome label at bottom right is quite small and harder to read than
     other labels
+  image_description: 'The plot displays a parallel categories diagram with four vertical
+    dimensions: Category, Channel, Payment, and Outcome. The Category dimension on
+    the left shows four stacked colored bars (Electronics in blue, Clothing in yellow,
+    Home & Garden in teal, Sports in coral). Curved ribbons flow from each category
+    through the middle dimensions (Channel showing Online/Store/Mobile App; Payment
+    showing Credit Card/Debit Card/Digital Wallet) to the Outcome dimension on the
+    right (Completed/Returned/Cancelled). The ribbons are semi-transparent and colored
+    by their source category, allowing visual tracking of customer purchase journeys.
+    The title "parallel-categories-basic · pygal · pyplots.ai" appears at the top
+    center. A legend at the bottom shows the four category colors, with an italic
+    subtitle describing the visualization.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title is large and clear, dimension headers bold, category labels
+          readable. Minor: "Cancelled" label at bottom right is smaller than other
+          outcome labels'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels positioned outside bars, ribbons
+          flow cleanly
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Ribbons clearly visible with good opacity (0.4), bars well-sized.
+          Ribbons appropriately show flow proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Colorblind-safe palette: blue, yellow, teal, coral - all distinguishable'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot fills most of the space. Small margin imbalance
+          at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for parallel categories but dimension headers are descriptive
+          (Category, Channel, Payment, Outcome)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is present and well-placed, no grid needed for this chart
+          type
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel categories visualization with ribbons connecting
+          dimensions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Four categorical dimensions correctly mapped: Category → Channel
+          → Payment → Outcome'
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Ribbons proportional to counts, colored by first dimension, multiple
+          dimensions shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories visible in each dimension
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend shows category colors but middle dimensions use different
+          colors not explained
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "parallel-categories-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows flows across 4 dimensions with varying proportions, includes
+          completed/returned/cancelled outcomes. Could show more variation in return
+          rates
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real e-commerce customer journey scenario with plausible product
+          categories and channels
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Transaction counts are realistic (hundreds per path), proportions
+          make sense
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Code builds SVG manually which is complex but necessary for pygal
+          parallel categories. Uses procedural structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: cairosvg, numpy, pygal, Style'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.svg, plot.html)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Creatively uses pygal's XY chart as base and builds custom SVG elements.
+          Shows deep understanding of pygal's SVG rendering but relies heavily on
+          manual SVG construction
+  verdict: APPROVED
diff --git a/plots/parallel-categories-basic/metadata/seaborn.yaml b/plots/parallel-categories-basic/metadata/seaborn.yaml
index 0916755a3c..71e81f4210 100644
--- a/plots/parallel-categories-basic/metadata/seaborn.yaml
+++ b/plots/parallel-categories-basic/metadata/seaborn.yaml
@@ -24,3 +24,184 @@ review:
     LF score)
   - The survival probability calculation loop adds code complexity that could be simplified
     with vectorized operations
+  image_description: 'The plot displays a parallel categories diagram with 5 categorical
+    dimensions: Class (First, Second, Third), Sex (Female, Male), Age Group (Child,
+    Adult, Senior), Embarked (Southampton, Cherbourg, Queenstown), and Outcome (Survived,
+    Lost). Each dimension is represented by vertical dark gray bars with white labels
+    placed outside. The ribbons connecting categories are colored using a colorblind-safe
+    palette: blue for First Class, orange/gold for Second Class, and teal/turquoise
+    for Third Class. The ribbons flow from left to right showing how observations
+    move through categories. The title "parallel-categories-basic · seaborn · pyplots.ai"
+    appears at the top in large bold text. A legend at the bottom shows the three
+    class colors. The dimension headers are displayed in teal color above each vertical
+    bar. The overall layout is clean with good use of whitespace and no clipping.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and dimension headers are large and clear; category labels
+          are readable but some could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Most labels avoid overlap; slight congestion between some middle
+          dimension labels
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ribbons are well-sized with appropriate alpha (0.55), flows are clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's colorblind palette, excellent contrast between the
+          three class colors
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space, plot fills most of the area; minor: legend
+          could be slightly closer to the plot'
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for parallel categories (no traditional axes), but dimension
+          headers serve this purpose well
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean whitegrid style, legend well-positioned at bottom with appropriate
+          transparency
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct parallel categories implementation with ribbons showing flows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All 5 dimensions correctly mapped with proper category ordering
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Width-proportional ribbons present; coloring by first dimension (Class);
+          missing interactive highlighting (not available in static seaborn)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories visible and proportionally represented
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows First/Second/Third Class colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "parallel-categories-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows flow across all 5 dimensions with good variation; survival
+          probability patterns visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Titanic-style dataset is a classic, well-understood example for parallel
+          categories
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 500 samples with realistic class distribution; some categories (e.g.,
+          Queenstown) are quite small
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear flow but contains a loop for survival probability calculation
+          which adds complexity
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, matplotlib.patches, matplotlib.path,
+          seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn/matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with correct settings
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_style, sns.set_context, and sns.color_palette effectively;
+          however, the core visualization is built with matplotlib patches rather
+          than native seaborn plots (seaborn doesn't have native parallel categories
+          support)
+  verdict: APPROVED
diff --git a/plots/parliament-basic/metadata/altair.yaml b/plots/parliament-basic/metadata/altair.yaml
index 7871933853..ae9742855b 100644
--- a/plots/parliament-basic/metadata/altair.yaml
+++ b/plots/parliament-basic/metadata/altair.yaml
@@ -23,3 +23,170 @@ review:
   - Large gap between the parliament chart and the legend creates visual disconnection
   - Uses political party theme which content policy suggests avoiding for neutral
     topics
+  image_description: 'The plot displays a semicircular parliament seat chart with
+    300 seats arranged in 5 concentric arcs. Seats are rendered as circular dots colored
+    by party: Progressive (blue, 95 seats) on the left, followed by Conservative (yellow,
+    82 seats), Green (green, 45 seats), Liberal (orange, 38 seats), Social Dem. (purple,
+    28 seats), and Independent (brown, 12 seats) on the right. The title "parliament-basic
+    · altair · pyplots.ai" appears at the top center. A horizontal legend at the bottom
+    shows all parties with seat counts in parentheses, arranged in 3 columns. The
+    semicircle fills the canvas well from left to right.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 28pt, legend labels 16pt, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Seat markers are size 250 with 0.9 opacity, excellent visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Six distinct colors, no red-green conflicts, all distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good semicircle placement but legend is far from chart with significant
+          whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for parliament chart, axes correctly hidden
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-designed but positioned too far from chart with excessive
+          gap
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semicircular parliament seat chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Seats correctly arranged in arcs, parties assigned left-to-right
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Individual seat dots, party colors, seat counts in legend all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 300 seats visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Party names with seat counts in parentheses
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "parliament-basic · altair · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 6 parties with varying seat counts, demonstrates arc arrangement
+      - id: DQ-02
+        name: Realistic Context
+        score: 4
+        max: 7
+        passed: false
+        comment: Uses fictional political party names, which is acceptable but explicitly
+          uses political parties (elections, parties) which the content policy advises
+          against
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 300 seats with realistic distribution
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair 5.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Altair's declarative encoding, custom color scales,
+          detailed legend configuration, tooltips, and mark_circle
+  verdict: APPROVED
diff --git a/plots/parliament-basic/metadata/bokeh.yaml b/plots/parliament-basic/metadata/bokeh.yaml
index 874ad40c56..6bd41a9722 100644
--- a/plots/parliament-basic/metadata/bokeh.yaml
+++ b/plots/parliament-basic/metadata/bokeh.yaml
@@ -1,6 +1,3 @@
-# Per-library metadata for bokeh implementation of parliament-basic
-# Auto-generated by impl-generate.yml
-
 library: bokeh
 specification_id: parliament-basic
 created: '2025-12-30T00:03:30Z'
@@ -15,5 +12,195 @@ preview_thumb: https://storage.googleapis.com/pyplots-images/plots/parliament-ba
 preview_html: https://storage.googleapis.com/pyplots-images/plots/parliament-basic/bokeh/plot.html
 quality_score: null
 review:
-  strengths: []
-  weaknesses: []
+  strengths:
+  - Excellent semicircular parliament visualization with proper concentric arc arrangement
+  - Clean center annotation showing total seat count (300) with "seats" label
+  - Majority threshold annotation (151 seats) provides useful political context
+  - Legend displays party names with seat counts in parentheses as required by spec
+  - Good use of Bokeh's Label model for custom annotations
+  - Colors are distinct and party sizes are clearly differentiated by arc positioning
+  - Properly generates both PNG and HTML output for Bokeh
+  weaknesses:
+  - '**Implementation Python file is missing from the PR** - only metadata/bokeh.yaml
+    was committed; the actual `plots/parliament-basic/implementations/bokeh.py` file
+    needs to be included'
+  - Seat dots could be slightly larger (current size=20 may be small at 4800x2700
+    resolution)
+  - Red (#E74C3C) and Orange (#E67E22) colors are somewhat close for colorblind accessibility
+  - Political party names (Progressive, Conservative, etc.) could potentially be replaced
+    with more neutral terminology (e.g., "Party A", "Party B") to avoid any political
+    connotations
+  image_description: |-
+    The plot displays a semicircular parliament seat chart with 300 total seats arranged in concentric arcs. Seven parties are represented with distinct colors:
+    - **Progressive Party (85 seats)** - Blue (#306998) - positioned on the left side
+    - **Conservative Union (72 seats)** - Red (#E74C3C) - center-left area
+    - **Green Alliance (45 seats)** - Green (#27AE60) - center area
+    - **Liberal Democrats (38 seats)** - Yellow (#FFD43B) - center-right area
+    - **Social Democrats (32 seats)** - Purple (#9B59B6) - right-center area
+    - **Regional Party (18 seats)** - Orange (#E67E22) - right side
+    - **Independents (10 seats)** - Gray (#95A5A6) - far right side
+
+    The title reads "parliament-basic · bokeh · pyplots.ai" at the top. A large "300" with "seats" label appears in the center of the semicircle. Below the chart, "Majority threshold: 151 seats" is displayed. The legend is positioned on the right side showing all parties with their seat counts. Individual seats are rendered as circular dots arranged in multiple concentric arcs.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and legend text are clearly readable; center "300" is prominent
+          and bold
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; seats are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Seat dots are visible but could be slightly larger for better distinction
+          at full resolution
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color variety, though red/orange proximity may be challenging
+          for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; semicircle is well-centered with adequate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type; appropriately hidden axes
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right; no distracting grid
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semicircular parliament seat chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Seats correctly distributed by party across arcs
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has legend with seat counts, majority threshold annotation; missing
+          optional left-to-right political spectrum ordering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 300 seats visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Party names and seat counts match the data
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but could benefit from slightly larger title
+          font
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple parties with varying seat counts; demonstrates concentric
+          arc arrangement
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Uses generic party names (Progressive, Conservative, etc.) which
+          is acceptable; however, spec notes this could be controversial territory
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 300 total seats is realistic for a parliament; seat counts are proportionally
+          sensible
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: '**CRITICAL: Implementation Python file is missing from PR**'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: false
+        comment: Cannot verify due to missing file, but output appears deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Cannot verify
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Bokeh 3.8.1 used per metadata
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: plot.png and plot.html generated correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Bokeh's ColumnDataSource, Label annotations, Legend with styling,
+          and figure customization
+  verdict: REJECTED
diff --git a/plots/parliament-basic/metadata/highcharts.yaml b/plots/parliament-basic/metadata/highcharts.yaml
index 011d06cbd1..6d8dd8551c 100644
--- a/plots/parliament-basic/metadata/highcharts.yaml
+++ b/plots/parliament-basic/metadata/highcharts.yaml
@@ -23,3 +23,173 @@ review:
   weaknesses:
   - Uses a helper function calculate_seat_positions instead of inline code (violates
     KISS structure requirement)
+  image_description: 'The plot displays a semicircular parliament seat chart with
+    280 total seats arranged in 5 concentric arcs. Six parties are represented with
+    distinct colors: Progressive Alliance (blue, 85 seats) on the left, Unity Party
+    (yellow, 72 seats), Liberty Coalition (purple, 48 seats), Green Forum (green,
+    35 seats), Social Democrats (cyan, 28 seats), and Reform Movement (brown, 12 seats)
+    on the right. Each seat is rendered as an individual circular dot with a subtle
+    dark border. The title "parliament-basic · highcharts · pyplots.ai" appears at
+    the top center with a subtitle showing "Total Seats: 280". A vertical legend on
+    the right side lists all parties with their seat counts. The semicircle is well-proportioned
+    on a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 56px, subtitle at 36px, legend at 32px - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Seats are visible with radius 16 and dark borders, though slightly
+          small for outer rows
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette used (blue, yellow, purple, green, cyan,
+          brown)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, chart fills canvas well, legend positioned cleanly
+          on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for parliament chart (no axes needed), but criteria expects labels
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well placed with seat counts, no grid (appropriate for this
+          chart type)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semicircular parliament seat chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Seats correctly distributed across parties in arcs
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Individual seats as dots, party colors, legend with seat counts
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 280 seats visible across all parties
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend shows party names with accurate seat counts
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "parliament-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 6 parties with varied seat counts, good distribution from large
+          (85) to small (12)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Neutral fictional legislature context, avoids real politics (good!)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 280 total seats is realistic for a parliament
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses a function `calculate_seat_positions` which violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses ScatterSeries with custom markers, proper Highcharts options
+          structure, generates interactive HTML version
+  verdict: APPROVED
diff --git a/plots/parliament-basic/metadata/letsplot.yaml b/plots/parliament-basic/metadata/letsplot.yaml
index e3274dcd52..0fb34dfacd 100644
--- a/plots/parliament-basic/metadata/letsplot.yaml
+++ b/plots/parliament-basic/metadata/letsplot.yaml
@@ -24,3 +24,169 @@ review:
   weaknesses:
   - Title uses lets-plot with hyphen instead of letsplot (one word)
   - Seats are distributed by row rather than strictly left-to-right across the semicircle
+  image_description: 'The plot displays a semicircular parliament seat chart with
+    5 concentric arcs of dots representing 400 total seats. The seats are colored
+    by 6 divisions: Finance Committee (blue, 85 seats), Technology Board (yellow,
+    72 seats), Operations Division (orange, 58 seats), Research Council (light blue,
+    35 seats), Marketing Team (red, 95 seats), and Legal Advisory (purple, 55 seats).
+    The title "parliament-basic · lets-plot · pyplots.ai" is centered at the top.
+    A legend on the right shows each division with its seat count. A horizontal baseline/majority
+    line is drawn at y=0, and annotation text below reads "Total: 400 seats | Majority:
+    201". The chart uses a white background with theme_void styling.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and legend text clearly readable, good font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Dots are well-sized with good spacing; outer rows could be slightly
+          larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with distinct hues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced layout with legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for this plot type (theme_void removes axes appropriately)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed with seat counts, no distracting grid
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semicircular parliament chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Seats correctly mapped to dot positions
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has individual seats as dots, legend with counts, majority threshold;
+          seats not strictly left-to-right by political spectrum (spec note)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 400 seats displayed correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correct with seat counts
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but says "lets-plot" instead of "letsplot"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple groups with varying seat counts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses neutral board/committee scenario instead of political parties
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 400 total seats is realistic for a legislative body
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data but no random seed for future variations
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses lets-plot grammar of graphics
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot, geom_point, theme_void, coord_fixed, but could
+          leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/parliament-basic/metadata/matplotlib.yaml b/plots/parliament-basic/metadata/matplotlib.yaml
index e6115c8fea..d7516c139f 100644
--- a/plots/parliament-basic/metadata/matplotlib.yaml
+++ b/plots/parliament-basic/metadata/matplotlib.yaml
@@ -27,3 +27,173 @@ review:
     than completely neutral names like colors or regions
   - Could use matplotlib patches/collections for more efficient rendering of many
     similar shapes
+  image_description: 'The plot displays a semicircular parliament seat chart with
+    400 seats arranged in 8 concentric arcs. Five parties are represented using a
+    colorblind-safe palette: Progressive Alliance (145 seats, orange/gold) on the
+    far left, Liberty Union (110 seats, blue), Green Future (45 seats, green), Conservative
+    Party (80 seats, red-orange), and Reform Movement (20 seats, pink) on the far
+    right. Seats are rendered as individual dots with white edge borders. The title
+    "parliament-basic · matplotlib · pyplots.ai" appears at the top in bold. Below
+    the semicircle, italic text displays "Majority threshold: 201 seats". A 3-column
+    legend at the bottom shows party names with seat counts. The chart has a clean
+    white background with no axes visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, legend at 14pt, all text clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean spacing between seats and text
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Seat dots sized well (s=160), white edges provide separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses colorblind-safe palette (#E69F00, #56B4E9, #009E73, #D55E00,
+          #CC79A7)'
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but some empty space at bottom; legend placement
+          could be closer
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (axes hidden appropriately)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is functional but ncol=3 creates uneven rows; could be better
+          organized
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semicircular parliament seat chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Seats correctly arranged left-to-right by party
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has individual seats as dots, party colors, legend with seat counts,
+          majority threshold annotation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 400 seats visible and accounted for
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Party names and seat counts displayed correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: parliament-basic · matplotlib · pyplots.ai'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows variety: large party (145), medium (110, 80), small (45, 20)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 4
+        max: 7
+        passed: false
+        comment: Uses fictional parliament with neutral party names, but "Progressive"/"Conservative"
+          still suggest political spectrum (-3)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 400 total seats is realistic for a medium-sized parliament
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → layout → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed as data is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct settings
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses scatter with edgecolors, custom positioning, but no advanced
+          matplotlib features like patches or collections
+  verdict: APPROVED
diff --git a/plots/parliament-basic/metadata/plotly.yaml b/plots/parliament-basic/metadata/plotly.yaml
index 91c2a3c450..ed74a30412 100644
--- a/plots/parliament-basic/metadata/plotly.yaml
+++ b/plots/parliament-basic/metadata/plotly.yaml
@@ -25,3 +25,180 @@ review:
   weaknesses:
   - The majority threshold dashed line is quite faint (rgba 0.3 alpha); could be slightly
     more visible while remaining subtle
+  image_description: 'The plot displays a semicircular parliament seat chart with
+    420 total seats arranged in 7 concentric arcs. Seats are rendered as circular
+    dots, colored by party: Progressive Alliance (blue, 145 seats) on the left, followed
+    by Civic Union (yellow, 118), Green Future (green, 52), Liberty Party (purple,
+    48), Reform Coalition (orange, 35), and Independent Group (pink, 22) moving toward
+    the right. A dashed gray arc indicates the majority threshold (211 seats). The
+    title "parliament-basic · plotly · pyplots.ai" appears centered at the top in
+    dark gray. The total seat count "420 seats" is displayed at the center bottom
+    of the semicircle. A horizontal legend at the bottom shows all parties with their
+    seat counts, plus the majority threshold.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, legend, and annotations are all clearly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; seats are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized with white borders for distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are distinguishable and colorblind-friendly (blue, yellow,
+          green, purple, orange, pink)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas; semicircle is well-proportioned with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for this plot type (no traditional axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-placed horizontally; majority line is subtle but could
+          be slightly more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semicircular parliament seat chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Seats correctly arranged left-to-right by party
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: semicircular arcs, individual seats as
+          dots, party colors, legend with seat counts, majority threshold line'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 420 seats visible and accounted for
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend shows party names with accurate seat counts
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "parliament-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple parties with varying seat counts, demonstrating parliament
+          composition visualization
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Uses fictional neutral party names which is appropriate, but the
+          political nature of parliament charts is inherently political content. However,
+          using neutral names mitigates this concern adequately.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 420 total seats is realistic for a parliament; party distributions
+          are plausible
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential structure: imports → data → calculations → plot
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: No random seed set, though data is deterministic (hardcoded). However,
+          seat distribution algorithm could benefit from explicit seed for any numpy
+          operations.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Plotly: interactive hover templates, go.Scatter
+          for custom markers, annotations, clean layout configuration, HTML export
+          for interactivity'
+  verdict: APPROVED
diff --git a/plots/parliament-basic/metadata/plotnine.yaml b/plots/parliament-basic/metadata/plotnine.yaml
index 6d07df7d5e..81ea4993d3 100644
--- a/plots/parliament-basic/metadata/plotnine.yaml
+++ b/plots/parliament-basic/metadata/plotnine.yaml
@@ -22,3 +22,177 @@ review:
   weaknesses:
   - Seat markers could be slightly larger for better visibility at full resolution
   - Does not include optional majority threshold line mentioned in spec
+  image_description: 'The plot displays a semicircular parliament seat chart with
+    320 total seats arranged in 8 concentric arcs. Individual seats are rendered as
+    colored dots. From left to right, the parties are: Progressive Alliance (blue
+    #306998, 85 seats), Center Coalition (yellow #FFD43B, 72 seats), Conservative
+    Union (teal #4ECDC4, 68 seats), Green Future (green #2ECC71, 42 seats), Liberal
+    Democrats (purple #9B59B6, 35 seats), and Independent Group (gray #95A5A6, 18
+    seats). The legend is positioned at the bottom in a 2-row horizontal layout, showing
+    party names with seat counts in parentheses. The title "parliament-basic · plotnine
+    · pyplots.ai" is centered at the top in bold. The visualization uses theme_void
+    with a clean white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (26pt), legend text is clear (13pt), all text perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements; seats are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Seats are well-sized (size=5) with good alpha (0.95); could be slightly
+          larger for better visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are distinguishable and avoid pure red-green conflicts; uses
+          blue, yellow, teal, green, purple, gray palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good semicircular layout filling canvas well; slight asymmetry in
+          vertical centering
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for this plot type (theme_void removes axes) - no deduction appropriate
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-positioned at bottom, horizontal with 2 rows, includes
+          seat counts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semicircular parliament seat chart with dots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Seats correctly arranged in arcs from left to right by party
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Individual seats as dots, party colors, legend with seat counts
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 320 seats visible across all arcs
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend shows all 6 parties with correct names and seat counts
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "parliament-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple parties with varying sizes; missing majority threshold
+          line (optional per spec)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Uses fictional neutral party names (good), but parliament theme could
+          be considered edge case per content policy
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 320 total seats is realistic for a parliament; seat distribution
+          is plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements used)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine grammar of graphics well (ggplot, geom_point, theme_void,
+          scale_color_manual, guide_legend); coord_fixed for aspect ratio; could leverage
+          more advanced features
+  verdict: APPROVED
diff --git a/plots/parliament-basic/metadata/seaborn.yaml b/plots/parliament-basic/metadata/seaborn.yaml
index ac5147ead5..4073dd60b4 100644
--- a/plots/parliament-basic/metadata/seaborn.yaml
+++ b/plots/parliament-basic/metadata/seaborn.yaml
@@ -23,3 +23,172 @@ review:
   weaknesses:
   - Layout has some empty space below the baseline that could be better utilized
   - Title uses regular dots which work but could be improved
+  image_description: 'The plot displays a semicircular parliament-style seat chart
+    with 5 concentric arcs. The title "parliament-basic · seaborn · pyplots.ai" appears
+    at the top in bold. A legend in the upper-left corner shows 6 departments: Engineering
+    (85 seats, blue), Operations (72 seats, orange), Finance (45 seats, green), Marketing
+    (38 seats, orange), Research (22 seats, pink), and Legal (18 seats, tan). Each
+    seat is represented as a circular dot with white edge borders. A dashed gray horizontal
+    line marks the baseline with "Majority: 141 seats" annotation on the right. At
+    the bottom, "Total: 280 seats" is displayed. The layout fills the canvas well
+    with balanced whitespace.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, legend text at 14pt, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Seats well-sized (s=350) with white edges for separation, minor:
+          outer rows slightly crowded'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's colorblind-safe palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall, but bottom portion has significant empty space below
+          the baseline
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for this plot type (axis is hidden)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed, includes seat counts, no distracting grid
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct semicircular parliament seat chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Groups correctly mapped to seat positions left-to-right
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Individual seats as dots, party colors, legend with counts, majority
+          threshold
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 280 seats visible across all rows
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Accurate seat counts for all 6 groups
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format but uses middle dots instead of proper interpuncts
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple groups with varying sizes, good distribution across
+          arcs
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Corporate board composition is neutral and plausible but somewhat
+          generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 280 total seats with realistic department proportions
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot with hue for grouping, sns.set_style, sns.set_context,
+          colorblind palette
+  verdict: APPROVED
diff --git a/plots/pdp-basic/metadata/altair.yaml b/plots/pdp-basic/metadata/altair.yaml
index 7ad9a13e7f..beaa98b856 100644
--- a/plots/pdp-basic/metadata/altair.yaml
+++ b/plots/pdp-basic/metadata/altair.yaml
@@ -26,3 +26,179 @@ review:
   - Rug plot ticks are somewhat thin and could be more prominent
   - Could add interactive selection or zoom capabilities to better leverage Altair
     strengths
+  image_description: The plot displays a partial dependence plot for the "Temperature"
+    feature. A dark blue line (#306998) shows the average predicted outcome as temperature
+    varies across its standardized range (-2.5 to 2.5). The line is surrounded by
+    a light blue confidence band (95% CI from bootstrap resampling). A rug plot at
+    the bottom shows tick marks representing the distribution of training data values.
+    The curve demonstrates a clear positive monotonic relationship - as temperature
+    increases, the partial dependence (predicted outcome) increases from approximately
+    -35 to +30. The title follows the required format "pdp-basic · altair · pyplots.ai"
+    at the top center. Grid lines are subtle with dashed styling. Font sizes appear
+    appropriately scaled.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Line and confidence band clearly visible, though rug plot ticks are
+          somewhat thin
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) used throughout, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (standardized units)" and "Partial Dependence (predicted
+          outcome)" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and dashed (good), but no legend present for confidence
+          band
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct partial dependence plot with continuous line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values on X-axis, partial dependence on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has PDP line, confidence interval band, and rug plot as noted in
+          spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single feature PDP (data types are labeled on
+          axes)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "pdp-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full range of feature values, includes uncertainty visualization
+          via bootstrap CI
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Uses "Temperature" as feature which is plausible, though the "standardized
+          units" context is somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standardized values (-2 to 2) appropriate for normalized features,
+          PD values in sensible range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data generation → model training → PDP
+          computation → plotting → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42 for model
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (altair, numpy, pandas, sklearn components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current sklearn partial_dependence API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's layered composition (alt.layer), mark_area for CI band,
+          mark_tick for rug, tooltips. Good declarative approach but no interactive
+          selection or linked views.
+  verdict: APPROVED
diff --git a/plots/pdp-basic/metadata/bokeh.yaml b/plots/pdp-basic/metadata/bokeh.yaml
index 3e6a509f57..627a299516 100644
--- a/plots/pdp-basic/metadata/bokeh.yaml
+++ b/plots/pdp-basic/metadata/bokeh.yaml
@@ -24,3 +24,179 @@ review:
     be safer
   - Axis labels lack units (though partial dependence is unitless, could add context
     like standardized units)
+  image_description: 'The plot displays a Partial Dependence Plot with a blue line
+    showing the average partial dependence (centered at zero) across Feature X₀ values
+    ranging from 0 to 1. A light blue shaded band represents the 80% confidence interval
+    around the main line. The curve shows a characteristic sigmoid-like pattern: starting
+    low around -4 to -8 at low feature values, rising steeply through the middle range,
+    and plateauing around +1.5 at higher values. A yellow rug plot at the bottom shows
+    the distribution of training data points. The title "pdp-basic · bokeh · pyplots.ai"
+    appears at the top left. A dashed horizontal reference line at y=0 helps interpret
+    relative effects. The legend in the bottom right shows "80% CI", "Average PD",
+    and "Data Distribution". The background is a light gray (#fafafa) with dashed
+    grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriately scaled font sizes for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width and confidence band are well-sized; rug plot marks could
+          be slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Feature X₀ Value", "Partial Dependence (centered)")
+          but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend placement in bottom-right corner is partially obscured/cramped
+          and could overlap with data in other scenarios
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct PDP line plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values on X-axis, partial dependence on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes confidence band, rug plot for data distribution, and centering
+          at zero as noted in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "pdp-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows non-linear relationship well; could benefit from showing a
+          feature with different relationship type
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Uses sklearn''s Friedman #1 dataset with GradientBoostingRegressor
+          - a real ML interpretability scenario'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for a regression problem, though feature range
+          [0,1] is somewhat narrow
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → model → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current sklearn and bokeh APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Band for confidence interval and Span for reference line,
+          ColumnDataSource for data management; could leverage more Bokeh-specific
+          features like hover tooltips
+  verdict: APPROVED
diff --git a/plots/pdp-basic/metadata/highcharts.yaml b/plots/pdp-basic/metadata/highcharts.yaml
index fd0bcbf18a..c660419e28 100644
--- a/plots/pdp-basic/metadata/highcharts.yaml
+++ b/plots/pdp-basic/metadata/highcharts.yaml
@@ -26,3 +26,179 @@ review:
   - Rug plot lines could be slightly thicker or more prominent at the bottom of the
     plot rather than spanning full height
   - Could add more descriptive tooltip content showing exact values
+  image_description: The plot displays a Partial Dependence Plot with a blue line
+    (#306998) showing the relationship between Feature 0 Value (x-axis, ranging from
+    approximately -1.5 to 1.5) and Partial Dependence centered (y-axis, ranging from
+    approximately -14 to 17). A light blue shaded area represents the 90% Confidence
+    Interval around the main PDP line. Vertical blue lines along the x-axis serve
+    as a rug plot showing the distribution of training data values. The title "pdp-basic
+    · highcharts · pyplots.ai" is at the top with a subtitle "Partial Dependence of
+    Feature 0 on Model Predictions". A dashed horizontal line at y=0 provides a reference.
+    The legend is positioned in the top-right corner. The overall layout is clean
+    with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable
+          at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line is clearly visible with good thickness; confidence band is appropriately
+          transparent; rug plot lines are visible but could be slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue color scheme throughout, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout, though the plot could use slightly more of the
+          vertical canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis labeled "Partial Dependence (centered)" and X-axis labeled
+          "Feature 0 Value" - descriptive labels
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle, but the legend is positioned far from the data in
+          the upper right corner with significant empty space
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line plot showing partial dependence
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values correctly on X-axis, partial dependence on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes PDP line, confidence interval band, rug plot, centered at
+          zero
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "90% Confidence Interval" and "Partial Dependence"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "pdp-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows the PDP curve with variance across the feature range, demonstrates
+          both positive and negative partial dependence effects; could show more dramatic/interpretable
+          patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Uses sklearn GradientBoostingRegressor which is plausible; feature
+          labeling is generic ("Feature 0")
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for centered partial dependence
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current sklearn and highcharts APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Saves as plot.png correctly, but also saves plot.html (minor: extra
+          output)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Highcharts features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses AreaRangeSeries for confidence bands, plotLines for rug plot
+          and zero reference line; could leverage more interactive features or tooltip
+          customization
+  verdict: APPROVED
diff --git a/plots/pdp-basic/metadata/letsplot.yaml b/plots/pdp-basic/metadata/letsplot.yaml
index 5a2996356c..aeecb1300d 100644
--- a/plots/pdp-basic/metadata/letsplot.yaml
+++ b/plots/pdp-basic/metadata/letsplot.yaml
@@ -25,3 +25,181 @@ review:
     confidence band)
   - The rug plot is very subtle and may be hard to see; could use slightly taller
     segments
+  image_description: The plot displays a Partial Dependence Plot (PDP) for the "Temperature
+    (standardized)" feature. The main PDP line is rendered in bright yellow/gold color
+    showing the average predicted outcome increasing from approximately -25 to +40
+    as temperature increases from -2.5 to 2.5. Multiple light blue ICE (Individual
+    Conditional Expectation) lines are shown fanning out behind the main line, creating
+    a confidence envelope effect. A light blue shaded region (80% confidence band)
+    covers the range from approximately -175 to +175. A subtle rug plot is visible
+    along the bottom near y=0 showing the distribution of training data. The title
+    "pdp-basic · letsplot · pyplots.ai" appears at the top. Axes are clearly labeled
+    with "Temperature (standardized)" on x-axis and "Partial Dependence (predicted
+    outcome)" on y-axis.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large (~24pt), axis labels are ~20pt, tick labels
+          ~16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Main PDP line is highly visible in yellow, ICE lines appropriately
+          subtle. Minor deduction: ICE lines could be slightly more distinguishable'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow/gold main line contrasts well with blue ICE lines and confidence
+          band; colorblind-safe combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; minimal theme provides clean look. Minor
+          whitespace on edges
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Temperature (standardized)" and
+          "Partial Dependence (predicted outcome)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and well-styled, but no legend is present to explain
+          the yellow line vs ICE lines vs confidence band
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct partial dependence plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values on x-axis, partial dependence on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes confidence band, ICE lines, and rug plot as suggested in
+          spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full feature range displayed with appropriate y-axis scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (self-explanatory elements)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "pdp-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows main PDP behavior, uncertainty via ICE lines, data distribution
+          via rug. Shows increasing relationship typical of ML models
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses weather-related features (Temperature, Humidity, Pressure, etc.)
+          from a regression model - neutral scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Standardized feature values are appropriate; partial dependence scale
+          is plausible for regression predictions
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data/model → compute PDP → create
+          dataframes → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42 for model
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, lets_plot, sklearn components)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current sklearn and lets-plot APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html but path parameter usage is unconventional
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of ggplot grammar with geom_ribbon, geom_line, geom_segment,
+          and theme customization. Could leverage more lets-plot specific features
+          like tooltips for interactive version
+  verdict: APPROVED
diff --git a/plots/pdp-basic/metadata/matplotlib.yaml b/plots/pdp-basic/metadata/matplotlib.yaml
index 68c8b092a2..d0fb33b940 100644
--- a/plots/pdp-basic/metadata/matplotlib.yaml
+++ b/plots/pdp-basic/metadata/matplotlib.yaml
@@ -27,3 +27,186 @@ review:
     high-resolution canvas
   - Y-axis range dominated by wide confidence band making PDP line variation appear
     minimal
+  image_description: 'The plot displays a Partial Dependence Plot (PDP) with a blue
+    color scheme. The main PDP line is a thick dark blue line (#306998) showing a
+    gentle upward slope from approximately -50 at feature value -2 to +40 at feature
+    value +1.7. A light blue 95% confidence band surrounds the main line, spanning
+    from about -300 to +250 in the middle range. Faint ICE (Individual Conditional
+    Expectation) lines are visible as semi-transparent blue curves within the confidence
+    band. A yellow/gold rug plot at the bottom of the chart (at y ≈ -290) shows the
+    distribution of training data values as vertical tick marks, concentrated between
+    -1 and +1 feature values. The title reads "pdp-basic · matplotlib · pyplots.ai"
+    at the top. X-axis is labeled "Feature Value" and Y-axis is labeled "Partial Dependence
+    (Predicted Value)". A legend in the upper left shows three items: 95% Confidence
+    Interval, Partial Dependence, and Data Distribution. The grid is subtle with dashed
+    lines at alpha=0.3.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend is well-positioned in upper
+          left
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Main PDP line is thick (linewidth=4), ICE lines sampled every 10th
+          for clarity, rug plot visible but markers could be slightly larger for the
+          4800x2700 canvas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) color scheme is colorblind-safe
+          with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, but the rug plot at the very bottom edge
+          creates slight imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Feature Value" and "Partial Dependence (Predicted
+          Value)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha=0.3, but legend overlaps with the ICE lines
+          making it slightly distracting
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct PDP visualization showing marginal effect of feature on prediction
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values on x-axis, partial dependence on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: PDP line, confidence band, ICE lines,
+          rug plot'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes properly show full range of data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three plot elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "pdp-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows PDP with ICE lines showing individual variation, confidence
+          band, and data distribution. Shows the upward trend well but all ICE lines
+          have similar shape (less variation than ideal)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses sklearn's regression data with GradientBoostingRegressor - a
+          standard ML interpretability scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Feature values in -2 to 3 range is reasonable for standardized data,
+          but partial dependence range (-300 to 250) is very wide compared to the
+          actual PDP line movement
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42 for model
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports are used: matplotlib, numpy, sklearn components'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current sklearn.inspection.partial_dependence API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib's fill_between for confidence band, scatter for rug
+          plot, but could leverage more matplotlib-specific features like ax.axhline
+          for a reference line at y=0
+  verdict: APPROVED
diff --git a/plots/pdp-basic/metadata/plotly.yaml b/plots/pdp-basic/metadata/plotly.yaml
index e6a561337f..3bcbe0f089 100644
--- a/plots/pdp-basic/metadata/plotly.yaml
+++ b/plots/pdp-basic/metadata/plotly.yaml
@@ -26,3 +26,183 @@ review:
     Standard Deviation or compute actual 95% CI)
   - The PDP curve shows step-like artifacts suggesting the gradient boosting model
     structure is visible; a smoother curve would be more typical
+  image_description: The plot displays a Partial Dependence Plot (PDP) for BMI (Body
+    Mass Index, standardized) on the x-axis against Partial Dependence (centered)
+    on the y-axis. The main PDP line is shown in dark blue (#306998), showing a clear
+    positive monotonic relationship - as BMI increases, partial dependence increases
+    from approximately -40 at the lowest BMI values to about +60 at the highest. A
+    light blue semi-transparent confidence band (labeled "95% Confidence Interval")
+    surrounds the main line, showing prediction variability. A rug plot consisting
+    of small vertical gray lines near y=50 shows the distribution of training data
+    values along the x-axis, with data concentrated between -0.05 and 0.08. A horizontal
+    dashed gray line at y=0 serves as a reference. The title "pdp-basic · plotly ·
+    pyplots.ai" appears centered at the top. The legend is positioned in the upper-left
+    corner with a white semi-transparent background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width of 4 is good; confidence band clearly visible; rug plot
+          markers visible though could be slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with adequate margins; plot fills canvas well though
+          right side has extra white space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "BMI (Body Mass Index, standardized)" and
+          "Partial Dependence (centered)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; legend is well-placed with semi-transparent
+          background; however the legend label says "95% Confidence Interval" but
+          this is actually mean ± 1 standard deviation, not a 95% CI
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct PDP visualization type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values on x-axis, partial dependence on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has confidence band, rug plot for data distribution, centered PD
+          values (all mentioned in spec notes)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend labels mostly correct but "95% Confidence Interval" is technically
+          inaccurate (it is mean ± 1 std, which is ~68% interval)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "pdp-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive relationship well; however the curve has some unusual
+          step-like artifacts that may indicate grid resolution issues
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses sklearn diabetes dataset - a real, neutral, scientific dataset;
+          BMI effect on diabetes progression is a meaningful relationship
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standardized BMI values and centered PD values are appropriate for
+          interpretation
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → model → PDP computation
+          → plotting → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42 for model
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Using grid_resolution parameter which works but the centering math
+          could use PDP's built-in centered option
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of plotly.graph_objects with interactive features; fill="toself"
+          for confidence band; proper write_image and write_html output
+  verdict: APPROVED
diff --git a/plots/pdp-basic/metadata/plotnine.yaml b/plots/pdp-basic/metadata/plotnine.yaml
index 7360ae2134..fb642db896 100644
--- a/plots/pdp-basic/metadata/plotnine.yaml
+++ b/plots/pdp-basic/metadata/plotnine.yaml
@@ -22,3 +22,181 @@ review:
   - Missing legend to explain what the shaded confidence band represents
   - The confidence band is very wide, visually overwhelming the actual trend line
   - Rug marks are positioned slightly below the plot area rather than at the axis
+  image_description: The plot displays a partial dependence plot with a dark blue
+    line showing the average predicted outcome across standardized Temperature values
+    (x-axis ranging from approximately -2 to 2). A light blue confidence band (ribbon)
+    surrounds the line, showing the 95% confidence interval derived from ICE curves.
+    The confidence band is quite wide, spanning roughly ±200 units around the mean
+    prediction. A yellow/gold rug plot at the bottom shows the distribution of training
+    data points along the x-axis, with most data concentrated between -1.5 and 1.5.
+    The title "pdp-basic · plotnine · pyplots.ai" appears at the top in bold. The
+    y-axis is labeled "Partial Dependence (avg. prediction)" and the x-axis is labeled
+    "Temperature (standardized)". The plot uses a minimal theme with subtle gray grid
+    lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (~24pt), axis labels are clearly readable
+          (~20pt), tick labels are appropriately sized (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line is clearly visible with good thickness (size=2), confidence
+          band is appropriately transparent (alpha=0.25), rug marks are visible but
+          could be slightly taller
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) for main elements and gold (#FFD43B) for rug are colorblind-friendly
+          and high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though the confidence band dominates visually
+          due to its width
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Temperature (standardized)" and
+          "Partial Dependence (avg. prediction)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, but no legend explaining the confidence
+          band
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct partial dependence plot with line showing marginal effect
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values on x-axis, partial dependence on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes confidence interval (as ribbon), rug plot for data distribution
+          as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full range of data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-feature PDP (N/A, full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "pdp-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows clear upward trend in partial dependence, confidence band shows
+          variability, but the relationship is relatively linear/simple
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses sklearn GradientBoostingRegressor with synthetic regression
+          data, Temperature as feature name is plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Standardized feature values are appropriate; PD values span a wide
+          range which is realistic for the model
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data/model → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42 for model
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current sklearn and plotnine APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with geom_ribbon, geom_line, geom_segment
+          for rug, and proper theming. Could have used more plotnine-specific features
+          like scale_* customizations
+  verdict: APPROVED
diff --git a/plots/pdp-basic/metadata/pygal.yaml b/plots/pdp-basic/metadata/pygal.yaml
index b9d1e372fe..8c3180e153 100644
--- a/plots/pdp-basic/metadata/pygal.yaml
+++ b/plots/pdp-basic/metadata/pygal.yaml
@@ -27,3 +27,181 @@ review:
   - Rug plot points could be slightly more prominent for better visibility
   - Grid styling is functional but the legend positioning creates visual separation
     from the data
+  image_description: 'The plot displays a Partial Dependence Plot (PDP) with a white
+    background on a 16:9 canvas. The title "pdp-basic · pygal · pyplots.ai" appears
+    at the top in dark gray text. The main visualization shows three lines: a solid
+    dark blue line for "Partial Dependence", and two dashed lines in lighter colors
+    (gold/yellow for "95% CI Upper" and light blue for "95% CI Lower"). The x-axis
+    is labeled "Room Size (standardized)" ranging from approximately -2.4 to 2.4,
+    and the y-axis shows "Partial Dependence" ranging from -30 to 30. The PDP curve
+    shows a monotonic increasing relationship - as room size increases, the partial
+    dependence increases from about -30 to +25. A rug plot showing training data distribution
+    appears at the bottom with small red/maroon dots. The legend is positioned at
+    the bottom with four entries. Light gray grid lines appear in the background.
+    All text is clearly readable.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate font sizes for the 4800x2700 resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and legend entries are distinct
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: PDP line is clearly visible with good stroke width; CI lines are
+          appropriately styled as dashed; rug plot dots visible but could be slightly
+          larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Good color choices: blue for main line, gold and light blue for
+          CI bounds are distinguishable; avoids red-green confusion'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot fills majority of area; slight excess
+          margin on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Room Size (standardized)" and
+          "Partial Dependence"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is visible but legend placement at bottom separates it from
+          the data; legend items show small colored dots that are hard to distinguish
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: XY line chart showing partial dependence relationship'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values on X-axis, partial dependence on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes PDP line, confidence intervals, and rug plot as recommended
+          in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range of feature values and PDP values displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "pdp-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows clear monotonic relationship with confidence bands; demonstrates
+          positive correlation; could show more varied relationships
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: '"Room Size" is a plausible feature; standardized values are appropriate;
+          context is somewhat generic'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for standardized features and partial dependence
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script with imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current sklearn API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html, though this is acceptable for
+          pygal
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart, custom Style, stroke_style for line customization,
+          and legend_at_bottom; could leverage more SVG-specific features
+  verdict: APPROVED
diff --git a/plots/pdp-basic/metadata/seaborn.yaml b/plots/pdp-basic/metadata/seaborn.yaml
index 4bcd2d0f2c..1920dac587 100644
--- a/plots/pdp-basic/metadata/seaborn.yaml
+++ b/plots/pdp-basic/metadata/seaborn.yaml
@@ -27,3 +27,176 @@ review:
   - Could leverage more distinctive seaborn features (e.g., seaborn statistical estimation
     capabilities or figure-level functions)
   - The partial_dependence API from sklearn is older; modern approach uses PartialDependenceDisplay
+  image_description: The plot displays a Partial Dependence Plot (PDP) showing the
+    marginal effect of "Square Feet (standardized)" on the predicted outcome. The
+    main line is solid blue (#306998) with a light blue shaded confidence band (95%
+    CI) around it. The y-axis shows "Partial Dependence (centered)" ranging from approximately
+    -40 to +40. The x-axis ranges from about -2.5 to 3.5. A dashed horizontal gray
+    line at y=0 provides a reference baseline. A yellow/gold rug plot along the bottom
+    x-axis shows the distribution of training data. The title follows the correct
+    format "pdp-basic · seaborn · pyplots.ai". A legend in the upper left clearly
+    identifies the confidence interval and partial dependence line. The overall curve
+    shows a positive monotonic relationship between square feet and partial dependence,
+    which aligns with the housing price scenario.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is appropriate, confidence band visible but not overwhelming
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight extra whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Square Feet (standardized)" and
+          "Partial Dependence (centered)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend well-placed but grid is slightly too prominent at alpha=0.3
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct PDP visualization with line showing marginal effect
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Feature values on x-axis, partial dependence on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes confidence band, rug plot for data distribution, centered
+          at zero
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range of feature values displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies confidence interval and partial dependence
+          line
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "pdp-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows monotonic positive relationship clearly; could benefit from
+          showing non-linear effects
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Housing price prediction with Square Feet is a perfect real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Standardized values appropriate, though raw feature values might
+          be more intuitive
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → model → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set for reproducibility'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using `partial_dependence` directly instead of newer `PartialDependenceDisplay`
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.lineplot and sns.rugplot, but these are relatively basic
+          seaborn functions
+  verdict: APPROVED
diff --git a/plots/phase-diagram/metadata/altair.yaml b/plots/phase-diagram/metadata/altair.yaml
index 2d30d08a15..025625a5a8 100644
--- a/plots/phase-diagram/metadata/altair.yaml
+++ b/plots/phase-diagram/metadata/altair.yaml
@@ -25,3 +25,179 @@ review:
   weaknesses:
   - Title format should have spec-id as main title per SC-06 requirement
   - Axis labels lack units (radians and rad/s for physics context)
+  image_description: The plot displays a damped pendulum phase diagram with four distinct
+    trajectories in blue, yellow/gold, red/orange, and green colors. Each trajectory
+    spirals inward toward the equilibrium point at the origin (marked with a black
+    cross symbol). The starting points of each trajectory are marked with filled circular
+    markers. The x-axis is labeled "Position (x)" ranging from approximately -2.0
+    to 2.4, and the y-axis is labeled "Velocity (dx/dt)" ranging from -2.0 to 2.4.
+    The title "Damped Pendulum Phase Space" appears at the top with subtitle "phase-diagram
+    · altair · pyplots.ai". A legend on the right shows "Initial Condition" with four
+    entries (IC 1 through IC 4) with their starting coordinates. The plot has a subtle
+    dashed grid and good use of the canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is perfectly readable: title ~28pt, subtitle ~20pt, axis
+          labels ~22pt, tick labels ~18pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 2.5 with opacity 0.85 is excellent for showing trajectory
+          paths; starting points clearly marked with large markers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, red-orange, green) with
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills ~60-70% of canvas, well-balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but no units (Position and Velocity are dimensionless
+          in this context, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha 0.3 and dashed lines; legend well-positioned
+          on right
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phase diagram showing x vs dx/dt
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Position correctly on X, velocity (derivative) correctly on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple trajectories from different initial conditions, shows spiral
+          convergence to equilibrium, equilibrium point marked
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All trajectories fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows initial conditions for each trajectory
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: Main title says "Damped Pendulum Phase Space", subtitle has correct
+          format but spec requires "{spec-id} · {library} · pyplots.ai" as the title
+          format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple trajectories, spiral convergence (damping), equilibrium
+          point, different initial conditions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Damped pendulum is a classic physics example mentioned in the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for pendulum phase space (angles in radians,
+          angular velocities)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with scale_factor=3.0
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Altair''s declarative grammar: layered charts,
+          order encoding for proper line ordering, detail encoding for grouping, Title
+          with subtitle, configure methods for styling, interactive HTML export'
+  verdict: APPROVED
diff --git a/plots/phase-diagram/metadata/bokeh.yaml b/plots/phase-diagram/metadata/bokeh.yaml
index b8d8359f55..df686c44b8 100644
--- a/plots/phase-diagram/metadata/bokeh.yaml
+++ b/plots/phase-diagram/metadata/bokeh.yaml
@@ -25,3 +25,186 @@ review:
     {library} · pyplots.ai
   - Legend text appears small in the rendered output despite font size settings
   - Could add HoverTool for interactivity to show time/position on hover
+  image_description: The plot shows a phase diagram of a damped pendulum system displaying
+    four spiraling trajectories converging toward a stable equilibrium at the origin.
+    The trajectories are rendered as scatter points with a Viridis color gradient
+    indicating time evolution (purple/blue for early times, yellow for later times).
+    Four distinct starting points are marked with larger colored circles (blue, yellow,
+    red, green) with white borders. A red 'X' marks the equilibrium point at the origin.
+    A horizontal dashed gray line indicates the zero velocity line (dx/dt = 0). The
+    title reads "Damped Pendulum · phase-diagram · bokeh · pyplots.ai" in the top
+    left. Axis labels show "Position x (displacement)" and "Velocity dx/dt (m/s)".
+    A color bar on the right shows "Time (s)" ranging from 0-16. The legend in the
+    top right lists all trajectory starting points, equilibrium, and the zero velocity
+    line. Background is light gray (#fafafa).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Scatter points sized appropriately (size=12) for the data density;
+          trajectories clearly visible. Starting markers are prominent (size=30).
+          Minor: some inner spiral points overlap at convergence'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis palette is colorblind-safe; trajectory starting colors are
+          distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good canvas utilization; plot fills most of the space. Minor: legend
+          slightly crowded in top-right'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Position x (displacement)" and "Velocity
+          dx/dt (m/s)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3), but legend text is quite small and hard
+          to read despite label_text_font_size="20pt" claim
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phase diagram plotting x vs dx/dt
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Position on X-axis, velocity on Y-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple trajectories, fixed point marked, time evolution shown via
+          color
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All trajectories fully visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies starting points, equilibrium, and zero
+          velocity line
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: Title is "Damped Pendulum · phase-diagram · bokeh · pyplots.ai" but
+          format should be "{spec-id} · {library} · pyplots.ai" = "phase-diagram ·
+          bokeh · pyplots.ai" without the extra descriptor
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple trajectories from different initial conditions, spiral
+          convergence to equilibrium, demonstrates damped oscillator behavior excellently
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Damped harmonic oscillator is a classic physics example, neutral
+          educational topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Parameters (omega=2.0, gamma=0.3) produce realistic pendulum dynamics
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set (though not strictly needed for this deterministic
+          simulation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html, which is fine, but the sys.path
+          manipulation is unusual (though necessary for the module naming issue)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, ColorBar, LinearColorMapper, interactive tools.
+          Good use of Bokeh features but could leverage more (e.g., HoverTool to show
+          trajectory info on hover)
+  verdict: APPROVED
diff --git a/plots/phase-diagram/metadata/highcharts.yaml b/plots/phase-diagram/metadata/highcharts.yaml
index fea5ac20d0..51a8a9c625 100644
--- a/plots/phase-diagram/metadata/highcharts.yaml
+++ b/plots/phase-diagram/metadata/highcharts.yaml
@@ -25,3 +25,173 @@ review:
   - Specification suggests adding direction arrows or color gradient to show time
     evolution - not implemented
   - Y-axis tick labels are somewhat crowded with 0.25 increments
+  image_description: 'The plot displays a phase diagram showing three spiral trajectories
+    converging toward a central equilibrium point. The title "phase-diagram · highcharts
+    · pyplots.ai" appears at the top in bold, with a subtitle "Damped Harmonic Oscillator
+    - Phase Space Trajectories". The x-axis is labeled "Position x" (ranging from
+    approximately -2.4 to 3), and the y-axis is labeled "Velocity dx/dt" (ranging
+    from approximately -6 to 4.75). Three distinct colored trajectories are shown:
+    blue (High energy start), yellow (Medium energy start), and purple (Low energy
+    start), all spiraling inward toward the origin. A red diamond marker indicates
+    the equilibrium (fixed point) at the origin. The legend is positioned on the right
+    side with clear labels. Grid lines are light gray, and dashed reference lines
+    mark the zero axes. The background is white.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable; axis tick labels are appropriately
+          sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Trajectories are visible with good line width; markers are small
+          but line connections make trajectories clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and purple palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, though trajectories are slightly offset from
+          center
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Position x", "Velocity dx/dt") but without units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with alpha, well-placed legend on right side
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phase diagram showing x vs dx/dt
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Position on x-axis, velocity on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple trajectories, fixed point marked, spiral convergence shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all trajectory data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all series
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but could be more prominent
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows damped oscillation, multiple initial conditions, fixed point;
+          could add direction arrows
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Damped harmonic oscillator is a classic physics example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are appropriate for a typical oscillator system
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using strict=True in zip is modern but unnecessary complexity
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses Highcharts scatter with line connection, plotLines for reference
+          axes, custom markers for equilibrium
+  verdict: APPROVED
diff --git a/plots/phase-diagram/metadata/letsplot.yaml b/plots/phase-diagram/metadata/letsplot.yaml
index 2c6b870afb..8d14407c23 100644
--- a/plots/phase-diagram/metadata/letsplot.yaml
+++ b/plots/phase-diagram/metadata/letsplot.yaml
@@ -28,3 +28,181 @@ review:
     improve readability
   - Inner spiral loops become slightly dense near equilibrium; direction arrows could
     show time evolution more clearly
+  image_description: The plot displays a phase diagram (state space plot) showing
+    four spiral trajectories converging to a fixed point at the origin. The trajectories
+    are colored in blue, yellow, green, and purple, each starting from different initial
+    conditions marked with hollow circle markers. The fixed point (equilibrium) at
+    the origin is marked with a bold red "X". The x-axis is labeled "Position (x)"
+    ranging from approximately -2.5 to 3, and the y-axis is labeled "Velocity (dx/dt)"
+    ranging from approximately -6 to 5. The title "phase-diagram · letsplot · pyplots.ai"
+    appears at the top. A legend on the right side shows "Starting Condition" with
+    four entries corresponding to each trajectory's initial (x, v) values. The plot
+    has a clean minimal theme with subtle gray grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend text are all clearly readable
+          at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and legend entries are clearly
+          separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Trajectory lines are well-sized with good alpha (0.8), starting points
+          clearly marked; minor deduction as some inner spiral loops are slightly
+          dense
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue, yellow, green, purple palette which is colorblind-friendly;
+          good contrast between all colors
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; legend positioned appropriately
+          on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with proper notation: "Position (x)" and "Velocity
+          (dx/dt)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is visible and subtle, but legend title "Starting Condition"
+          is slightly generic; legend entries could be more descriptive
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phase diagram showing x vs dx/dt trajectories
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Position correctly mapped to x-axis, velocity (derivative) to y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows multiple trajectories, fixed point marked, spiral convergence
+          visible, direction of time evolution implied by path
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies each trajectory by initial condition
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "phase-diagram · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows spiral convergence (damped system), multiple initial conditions,
+          equilibrium point; could show a limit cycle or undamped case for contrast
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Damped harmonic oscillator is a classic physics example; neutral
+          scientific context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are physically reasonable for a normalized oscillator; omega=2,
+          gamma=0.3 produce sensible dynamics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data generation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not actually used since data is deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses lets-plot features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, geom_path for trajectories, geom_point with
+          shape/stroke for markers, scale_color_manual, theme_minimal with customization;
+          could leverage more lets-plot specific interactive features
+  verdict: APPROVED
diff --git a/plots/phase-diagram/metadata/matplotlib.yaml b/plots/phase-diagram/metadata/matplotlib.yaml
index 1ee1aa66b4..aafff05a13 100644
--- a/plots/phase-diagram/metadata/matplotlib.yaml
+++ b/plots/phase-diagram/metadata/matplotlib.yaml
@@ -24,3 +24,181 @@ review:
   - Axis labels lack units (e.g., "Position x (m)" or "(arbitrary units)" would be
     clearer)
   - Legend fontsize (14) could be increased to 16 for consistency with tick labels
+  image_description: 'The plot displays a phase diagram for a damped harmonic oscillator
+    showing four spiral trajectories converging toward the origin (equilibrium point).
+    The trajectories are colored in a distinct palette: blue (Python Blue #306998),
+    yellow, coral/red, and teal. Each trajectory starts from a large circular marker
+    indicating initial conditions and spirals clockwise inward toward the origin,
+    where a large black X marks the stable equilibrium point. Direction arrows are
+    placed along each trajectory showing the time evolution. The x-axis is labeled
+    "Position x" and the y-axis "Velocity dx/dt". The title reads "Damped Oscillator
+    · phase-diagram · matplotlib · pyplots.ai". A legend in the upper right identifies
+    each trajectory with its amplitude parameter. Subtle gray dashed reference lines
+    mark the axes at x=0 and y=0. The plot uses equal aspect ratio with axis limits
+    from -4 to 4 (x) and -3.5 to 3.5 (y).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt, legend 14pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 2.5, start markers s=250, arrows visible, all appropriately
+          sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, coral, teal palette is colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with equal aspect ratio; slight excess whitespace
+          in corners
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (Position x vs "Position x (m)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid alpha 0.3 is good; legend placed well but slightly large at
+          fontsize=14
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phase diagram showing x vs dx/dt
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Position on x-axis, velocity on y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple trajectories, direction arrows, equilibrium point, spiral
+          convergence all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All trajectories fully visible within axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all trajectories with amplitudes
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "Damped Oscillator · phase-diagram · matplotlib
+          · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows spiral convergence to equilibrium, multiple initial conditions,
+          direction arrows; could show both underdamped and critically damped for
+          contrast
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Damped harmonic oscillator is a classic physics example, neutral
+          and educational
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for a normalized oscillator; dimensionless
+          units work but real-world units would be better
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → parameters → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic analytical solution, no random data used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300, bbox_inches='tight'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of annotate with arrowprops, scatter with edge colors, axhline/axvline;
+          could leverage matplotlib's FancyArrowPatch or quiver for more sophisticated
+          direction arrows
+  verdict: APPROVED
diff --git a/plots/phase-diagram/metadata/plotly.yaml b/plots/phase-diagram/metadata/plotly.yaml
index 2f385af6da..8feae21058 100644
--- a/plots/phase-diagram/metadata/plotly.yaml
+++ b/plots/phase-diagram/metadata/plotly.yaml
@@ -31,3 +31,185 @@ review:
     diagrams)
   - Does not fully leverage Plotly animation features that could show trajectory evolution
     over time
+  image_description: The plot displays a phase diagram of a damped harmonic oscillator
+    with two spiral trajectories converging to a stable equilibrium point at the origin.
+    The first trajectory (blue, dashed line) starts at position x=2.0 with zero initial
+    velocity and spirals clockwise inward. The second trajectory (yellow/orange, dashed
+    line) starts at x=-1.5 with initial velocity v₀=3.0, creating a larger spiral
+    that also converges to the origin. A red "X" marker indicates the fixed point
+    (stable equilibrium) at the center. Green circular markers show the initial conditions
+    for both trajectories. Direction arrows along both paths indicate the time evolution.
+    The plot has a clean white background with subtle gray gridlines, properly labeled
+    axes ("Position x" and "Velocity dx/dt"), and a well-positioned legend in the
+    upper left corner showing all four trace types.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend, title, and labels are well
+          separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Trajectories clearly visible with good line width (3); markers sized
+          appropriately; slight deduction as markers are small (size=4) on the trajectory
+          lines
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; legend positioned near
+          data
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but without units (could be "Position x (m)" or
+          similar, though dimensionless is acceptable for phase diagrams)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha 0.1 (could be slightly more visible at 0.2-0.3);
+          legend well placed with good styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phase diagram showing x vs dx/dt
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Position on X-axis, velocity (derivative) on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple trajectories, fixed point marked, direction arrows, color
+          gradient for time evolution all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately describes all traces including initial conditions
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "Damped Harmonic Oscillator · phase-diagram
+          · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple trajectories from different initial conditions, spiral
+          convergence to equilibrium, direction of flow - demonstrates all key phase
+          diagram features
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Damped pendulum is an excellent physics example; parameters are realistic;
+          slight deduction for generic "Position x" instead of physical interpretation
+          like "Angular displacement"
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Damping ratio ζ=0.15 (underdamped), natural frequency ω₀=2.0, initial
+          conditions all physically reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data generation → plotting → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not actually used since data is analytical)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Scatter with hover templates (good), but could better leverage
+          Plotly's animation capabilities for showing time evolution dynamically,
+          or use go.Scattergl for performance
+  verdict: APPROVED
diff --git a/plots/phase-diagram/metadata/plotnine.yaml b/plots/phase-diagram/metadata/plotnine.yaml
index 098a3484ef..d03100766d 100644
--- a/plots/phase-diagram/metadata/plotnine.yaml
+++ b/plots/phase-diagram/metadata/plotnine.yaml
@@ -26,3 +26,177 @@ review:
     (m/s))
   - Only single trajectory shown; spec notes that multiple trajectories from different
     initial conditions could reveal basin of attraction structure
+  image_description: The plot shows a phase diagram (state space plot) displaying
+    a damped harmonic oscillator trajectory. The visualization features a spiral pattern
+    starting from the outer edge (at position x≈2, velocity≈0) and spiraling inward
+    towards the origin (equilibrium point). The color gradient transitions from dark
+    blue (early time t=0) through greenish tones to yellow (later time t≈8), clearly
+    showing the time evolution. A dark blue circular marker indicates the starting
+    point, and a yellow square marker shows the endpoint near the origin. Dashed reference
+    lines mark the x=0 and dx/dt=0 axes. The title "phase-diagram · plotnine · pyplots.ai"
+    appears at the top. Axis labels show "Position x" and "Velocity dx/dt". A colorbar
+    legend on the right shows "Time (s)" ranging from 0 to 8. The background is clean
+    with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          size with appropriate font sizing
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Trajectory line is clearly visible with good thickness; start/end
+          markers clearly distinguishable; slight deduction as the trajectory gets
+          quite dense near origin
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-to-yellow gradient is colorblind-friendly (viridis-like)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but missing units (could be "Position x (m)" and
+          "Velocity dx/dt (m/s)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with appropriate alpha, legend is well positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phase diagram showing x vs dx/dt trajectory
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Position on x-axis, velocity on y-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows damped oscillator trajectory, spiral convergence to equilibrium,
+          time evolution via color gradient, reference lines at zero crossings
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes properly show all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Time legend is accurate and descriptive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "phase-diagram · plotnine · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows damped oscillator well but spec mentions "multiple trajectories
+          from different initial conditions" as an option that could enhance the visualization
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Damped harmonic oscillator is a classic physics example, exactly
+          as suggested in the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are physically reasonable; 800 points provides smooth trajectory
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plotnine grammar (ggplot, aes, geom_path, scale_color_gradient,
+          theme customization), but could leverage more advanced features like annotations
+          or faceting
+  verdict: APPROVED
diff --git a/plots/phase-diagram/metadata/pygal.yaml b/plots/phase-diagram/metadata/pygal.yaml
index 68aafb7c1f..2b92b0b0a1 100644
--- a/plots/phase-diagram/metadata/pygal.yaml
+++ b/plots/phase-diagram/metadata/pygal.yaml
@@ -24,3 +24,15 @@ review:
   - The equilibrium point marker at origin is quite small and could be more prominent
   - The undamped oscillator trajectory shows some numerical drift (not a perfect ellipse
     due to Euler method)
+  image_description: 'The plot displays a phase diagram showing two oscillator trajectories
+    on a white background. The title "phase-diagram · pygal · pyplots.ai" appears
+    at the top in dark gray text. The X-axis is labeled "Position x" (range -1.2 to
+    2) and the Y-axis is labeled "Velocity dx/dt" (range -3 to 3). There are two main
+    trajectories: (1) A blue spiral trajectory ("Damped Oscillator γ=0.3") that starts
+    from the outer region and spirals inward toward the origin, demonstrating damped
+    oscillation converging to equilibrium; (2) A yellow elliptical trajectory ("Undamped
+    Oscillator (Limit Cycle)") that forms a closed loop, representing periodic motion
+    without energy loss. A small red dot at the origin marks the "Equilibrium (Fixed
+    Point)". The legend is displayed at the bottom with colored squares indicating
+    each series. Grid lines are shown in light gray.'
+  verdict: APPROVED
diff --git a/plots/phase-diagram/metadata/seaborn.yaml b/plots/phase-diagram/metadata/seaborn.yaml
index 9a5408db1f..0f779b011c 100644
--- a/plots/phase-diagram/metadata/seaborn.yaml
+++ b/plots/phase-diagram/metadata/seaborn.yaml
@@ -28,3 +28,181 @@ review:
     separately and its legend entry appears disconnected
   - Inner portions of spirals become dense and harder to distinguish near the origin
   - Could benefit from using a colorblind-friendly palette instead of custom colors
+  image_description: 'The plot shows a phase diagram (state space plot) for a damped
+    harmonic oscillator. Four spiral trajectories are displayed, each starting from
+    different initial conditions marked by large colored dots: blue (2.0, 0.0), yellow
+    (0.0, 8.0), red/orange (-1.5, -5.0), and teal (1.0, 4.0). All trajectories spiral
+    inward toward the equilibrium point at the origin, marked with a black ''X''.
+    The x-axis shows Position (x) ranging from about -1.5 to 2.0, and the y-axis shows
+    Velocity (dx/dt) ranging from about -10 to 7.5. Dashed gray reference lines cross
+    at the origin. Direction arrows are visible on the trajectories showing time evolution.
+    The title follows the correct format: "phase-diagram · seaborn · pyplots.ai".
+    A legend in the upper right identifies each trajectory by its initial condition.
+    The background uses seaborn''s whitegrid style.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, ticks at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend is well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Trajectories clearly visible with good linewidth, starting points
+          marked well, but some inner spiral portions get dense
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, red-orange, teal) provide good
+          contrast; yellow-teal distinction could be slightly better for colorblind
+          users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, plot fills space well with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Position (x)" and "Velocity (dx/dt)" are descriptive with units
+          indicated'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is visible but legend entry for "Equilibrium" is displayed despite
+          not being added properly to the hue legend (uses separate scatter)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phase diagram showing x vs dx/dt trajectories
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Position on x-axis, velocity on y-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple trajectories, direction arrows, fixed point, different initial
+          conditions - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full trajectory ranges without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies each trajectory by initial condition
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "phase-diagram · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows spiral convergence to equilibrium, multiple initial conditions
+          with varying starting positions and velocities; could show limit cycle or
+          unstable case for contrast
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Damped harmonic oscillator is a classic physics example, excellent
+          educational context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for a physics simulation; omega and zeta parameters
+          produce realistic behavior
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn and matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: 'Saves as plot.png (note: correcting - this should be 1/1)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.lineplot with hue grouping and sns.set_style, but direction
+          arrows are done with matplotlib's annotate rather than leveraging seaborn
+          further
+  verdict: APPROVED
diff --git a/plots/pie-basic/metadata/altair.yaml b/plots/pie-basic/metadata/altair.yaml
index 91874553ac..554b41d886 100644
--- a/plots/pie-basic/metadata/altair.yaml
+++ b/plots/pie-basic/metadata/altair.yaml
@@ -23,3 +23,165 @@ review:
   - Missing exploded slice for emphasis on key category (mentioned in spec notes)
   - Title could include a descriptive subtitle for context
   - Pie chart slightly off-center due to legend placement
+  image_description: 'The pie chart displays a "Budget allocation by department" scenario
+    with 6 department slices. The dominant slice is Engineering (Python Blue, #306998)
+    at 35.0%, followed by Marketing (teal, 20.0%), Operations (coral, 18.0%), Sales
+    (coral/pink, 15.0%), HR (yellow, 7.0%), and R&D (light teal, 5.0%). Percentage
+    labels are positioned outside each slice in bold black text. A legend titled "Department"
+    is placed on the right side listing all categories. White strokes separate the
+    slices clearly. The title "pie-basic · altair · pyplots.ai" is centered at the
+    top. The chart uses a square format with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, percentage labels at 20pt, legend labels at 18pt -
+          all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized with good radius, white stroke provides
+          excellent separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good distinction between categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though the pie is slightly off-center due to legend
+          placement
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right, appropriately sized symbols
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Category labels and values correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has percentage labels, distinct colors, legend; missing slight explosion
+          for emphasis on key slice (spec note)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, proportions accurate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match categories exactly
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format but missing descriptive subtitle
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied slice sizes (large to small), but all slices are similar
+          style (no exploded slice)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, realistic department budget distribution
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic but no random seed comment/documentation
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Altair's declarative encoding, theta encoding, layering,
+          tooltips, but could use interactivity more
+  verdict: APPROVED
diff --git a/plots/pie-basic/metadata/bokeh.yaml b/plots/pie-basic/metadata/bokeh.yaml
index 7bcf6184c0..63cb05f1d1 100644
--- a/plots/pie-basic/metadata/bokeh.yaml
+++ b/plots/pie-basic/metadata/bokeh.yaml
@@ -25,3 +25,178 @@ review:
   - Legend appears relatively small compared to the large canvas size; legend text
     could be scaled up for better readability at full resolution
   - Code uses manual loops instead of ColumnDataSource which is more idiomatic Bokeh
+  image_description: The plot shows a pie chart with 5 slices representing budget
+    allocation by department. The largest slice (Engineering, 35%) is in Python blue
+    (#306998) and is slightly exploded/separated from the center. The second largest
+    is Marketing (25%) in bright yellow (#FFD43B), followed by Sales (20%) in red,
+    Operations (12%) in purple, and HR (8%) in green. Each slice has a percentage
+    label displayed on it - white text for darker slices and dark text for the yellow
+    slice. The title "pie-basic · bokeh · pyplots.ai" appears at the top. A legend
+    on the right side lists all categories with their percentages. The pie chart is
+    centered in a square 3600x3600 canvas with clean white background and no axes/grid
+    (appropriate for pie charts).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 40pt, percentage labels at 32pt, legend at 28pt - all clearly
+          readable, though legend text could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-positioned within slices
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Wedges are clearly visible with good sizing, white borders separate
+          slices well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors that work for colorblind users (blue, yellow, red,
+          purple, green - no red-green only distinction)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, pie centered well, though legend placement creates
+          some asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for pie charts, handled correctly by hiding axes
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is functional but appears small relative to the large canvas,
+          could be better integrated
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: percentage labels, distinct colors, legend,
+          slight explosion on key slice'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, slices properly sized
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all categories with percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "pie-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 5 categories with varied proportions, explosion effect on largest
+          slice; could show more variation in smaller slices
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentages are reasonable (35%, 25%, 20%, 12%, 8% = 100%), though
+          distribution is somewhat standard
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Most imports used, though `output_file` and `save` are only used
+          for HTML output
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh wedge glyphs and Label annotations correctly, generates
+          both PNG and interactive HTML; however, could leverage ColumnDataSource
+          for more idiomatic Bokeh code
+  verdict: APPROVED
diff --git a/plots/pie-basic/metadata/highcharts.yaml b/plots/pie-basic/metadata/highcharts.yaml
index 3f9cb2ea0e..1a1173fad1 100644
--- a/plots/pie-basic/metadata/highcharts.yaml
+++ b/plots/pie-basic/metadata/highcharts.yaml
@@ -25,3 +25,166 @@ review:
     with excess whitespace on right
   - Data labels could benefit from slightly larger connector line distance for smaller
     slices
+  image_description: 'The plot displays a pie chart with 5 slices representing market
+    share distribution. The title "pie-basic · highcharts · pyplots.ai" is shown at
+    the top. The pie is positioned slightly left of center with a vertical legend
+    on the right side. Colors used are: blue (#306998) for Product A (35.0%), yellow
+    (#FFD43B) for Product B (25.0%), purple (#9467BD) for Product C (20.0%), cyan
+    (#17BECF) for Product D (12.0%), and brown (#8C564B) for Product E (8.0%). Product
+    A (the largest slice) is exploded/offset from the pie for emphasis. Each slice
+    has a data label showing the category name and percentage with connector lines.
+    The layout uses a square 3600x3600 format appropriate for pie charts.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, data labels at 32px, legend at 36px - all perfectly
+          readable at 3600x3600
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, data labels well-spaced with connector lines
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices clearly visible with good proportions, 70% size appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette avoiding red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall but pie shifted left (40% center) creates some asymmetry
+          with large right-side whitespace
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-positioned on right, clear formatting
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentage labels ✓, distinct colors ✓, legend ✓, exploded slice
+          for emphasis
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories visible and proportional
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "pie-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 5 categories with varying proportions, exploded slice; could
+          show more variation in slice sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share distribution is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values sum to 100%, realistic market share percentages
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → chart config → series
+          → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses deterministic data (no random), but no seed comment; acceptable
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses Highcharts-specific features: allowPointSelect, slicedOffset
+          for explosion, showInLegend, connector lines, point selection'
+  verdict: APPROVED
diff --git a/plots/pie-basic/metadata/letsplot.yaml b/plots/pie-basic/metadata/letsplot.yaml
index 8aa11e9089..94d78fc80a 100644
--- a/plots/pie-basic/metadata/letsplot.yaml
+++ b/plots/pie-basic/metadata/letsplot.yaml
@@ -21,3 +21,163 @@ review:
   weaknesses:
   - No slice explosion for emphasis on key slice as suggested in spec notes
   - Layout slightly unbalanced with pie shifted left relative to legend
+  image_description: 'The plot displays a pie chart showing market share distribution
+    across 5 companies. Company A (Python Blue #306998) dominates with 35%, followed
+    by Company B (Yellow #FFD43B) at 25%, Company C (Green #4CAF50) at 20%, Company
+    D (Orange #FF7043) at 12%, and Company E (Purple #AB47BC) at 8%. Each slice is
+    labeled with its percentage value. A vertical legend on the right identifies each
+    company by color. The title "pie-basic · letsplot · pyplots.ai" is centered at
+    the top in a readable font size.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and readable, percentage labels are clear, legend
+          text is appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; percentage labels are well-positioned
+          on slices
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized with good proportions, all slices visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are distinct and colorblind-safe; no red-green confusion
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall layout, but pie is shifted slightly left creating some
+          imbalance with the legend on the right
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is clear and well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes percentage labels, distinct colors, and legend as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 categories with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "pie-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in slice sizes; however, no slice explosion for emphasis
+          as suggested in spec notes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by company is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic market share percentages totaling 100%
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses export_ggsave instead of ggsave (minor issue, but works correctly)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_pie with layer_labels() which is lets-plot specific, but
+          could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/pie-basic/metadata/matplotlib.yaml b/plots/pie-basic/metadata/matplotlib.yaml
index 5cca8ef26f..bffd824eee 100644
--- a/plots/pie-basic/metadata/matplotlib.yaml
+++ b/plots/pie-basic/metadata/matplotlib.yaml
@@ -25,3 +25,168 @@ review:
   - Generic product names instead of more realistic category names
   - Could use shadow or additional visual enhancements
   - Legend title is generic - could match data context
+  image_description: 'The plot displays a pie chart with 5 slices representing market
+    share distribution for Products A through E. The largest slice (Product A at 35%)
+    is slightly exploded/offset from the center for emphasis. Colors used are: deep
+    blue (#306998) for Product A, golden yellow (#FFD43B) for Product B, teal (#4ECDC4)
+    for Product C, coral red (#FF6B6B) for Product D, and gray (#95A5A6) for Product
+    E. Each slice has a bold white percentage label centered within it. Category labels
+    (Product A-E) appear around the perimeter of the pie. A legend is positioned on
+    the right side with "Categories" as its title. The title "pie-basic · matplotlib
+    · pyplots.ai" appears at the top. White edge lines separate each slice for visual
+    clarity. The chart uses a square format appropriate for pie charts.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, labels at 20pt, percentages at 18pt bold - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels and percentages clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Slices well-sized, white edges provide clear separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are distinct and colorblind-safe (blue, yellow, teal, red,
+          gray)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though legend slightly extends canvas
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: false
+        comment: Legend well-placed, no grid needed for pie charts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has percentage labels, distinct colors, legend, and slight explosion
+          for emphasis
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "pie-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying slice sizes (35%, 25%, 20%, 12%, 8%), good range of
+          proportions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share distribution is a real, common scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable; could benefit from more realistic product
+          names
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic but no random seed comment (data doesn't use
+          random)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot imported, nothing unused
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses wedgeprops for edge styling, explode parameter, autopct formatting,
+          but could use shadow or more advanced features
+  verdict: APPROVED
diff --git a/plots/pie-basic/metadata/plotly.yaml b/plots/pie-basic/metadata/plotly.yaml
index 1bfde8a422..933e5dea3f 100644
--- a/plots/pie-basic/metadata/plotly.yaml
+++ b/plots/pie-basic/metadata/plotly.yaml
@@ -28,3 +28,168 @@ review:
   - Could use plotly hovertemplate for richer interactivity in the HTML version
   - Slice size distribution is fairly uniform (35/25/20/12/8) - more extreme variation
     would better demonstrate the plot type ability to show disparate proportions
+  image_description: The plot displays a pie chart showing "Budget allocation by department"
+    with 5 distinct slices. The Engineering slice (35%) is in Python Blue (#306998)
+    and is slightly exploded/pulled out for emphasis. Marketing (25%) is bright yellow,
+    Sales (20%) is teal, Operations (12%) is coral/salmon, and HR (8%) is light green.
+    Each slice displays both the category name and percentage directly on it. The
+    title "pie-basic · plotly · pyplots.ai" is centered at the top in a clean font.
+    A vertical legend on the right side lists all five departments with their corresponding
+    color squares. White borders cleanly separate each slice. The overall layout is
+    clean with a white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, slice labels at 20pt, legend at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels fit within slices or are clearly
+          positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized and clearly visible with good proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with distinct hues (blue, yellow, teal, coral,
+          green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall, but slight imbalance with empty space on left vs legend
+          on right
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right, clean styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has percentage labels, distinct colors, legend, and slight explosion
+          for emphasis
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "pie-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 5 categories with varying sizes, but could show more variation
+          in slice sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a classic, realistic use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (35%, 25%, 20%, 12%, 8%) are realistic department budget proportions
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Pie with pull parameter for explosion, textinfo for labels,
+          and generates interactive HTML. Could leverage more plotly-specific features
+          like hover templates.
+  verdict: APPROVED
diff --git a/plots/pie-basic/metadata/pygal.yaml b/plots/pie-basic/metadata/pygal.yaml
index a65f6fb17e..5ecadc0696 100644
--- a/plots/pie-basic/metadata/pygal.yaml
+++ b/plots/pie-basic/metadata/pygal.yaml
@@ -24,3 +24,166 @@ review:
     legend_font_size for better readability
   - Missing slight explosion for emphasis on key slice as suggested in the specification
     notes
+  image_description: 'The plot displays a pie chart showing market share distribution
+    across 5 products. The chart uses a square format (3600x3600px) with distinct
+    colors: blue (#306998) for Product A (35%), yellow (#FFD43B) for Product B (25%),
+    teal (#4ECDC4) for Product C (20%), coral/red (#FF6B6B) for Product D (12%), and
+    mint green (#95E1D3) for Product E (8%). Each slice displays its percentage value
+    centered within the slice. The title "pie-basic · pygal · pyplots.ai" appears
+    at the top in dark gray text. A horizontal legend at the bottom identifies all
+    5 products with their corresponding color indicators arranged in 5 columns.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title and percentage labels are clearly readable at the large canvas
+          size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; percentages are well-positioned within
+          slices
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-proportioned and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are distinct and colorblind-friendly (no red-green as only
+          difference)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall layout, but legend at bottom appears small relative
+          to chart
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend text appears quite small compared to the chart; could be more
+          prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes percentage labels, distinct colors, and legend as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "pie-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows different sized slices demonstrating proportions, but missing
+          "explosion" feature mentioned in spec notes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share distribution is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (35%, 25%, 20%, 12%, 8%) are realistic for market share
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: false
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Style customization and render_to_png/render_to_file,
+          but doesn't use unique features like hover interactivity or half-pie mode
+  verdict: APPROVED
diff --git a/plots/pie-drilldown/metadata/altair.yaml b/plots/pie-drilldown/metadata/altair.yaml
index ff285f0245..e3cf63ad65 100644
--- a/plots/pie-drilldown/metadata/altair.yaml
+++ b/plots/pie-drilldown/metadata/altair.yaml
@@ -25,3 +25,152 @@ review:
     instead of one level at a time with click to reveal)
   - Uses helper function violating KISS principle (no functions/classes rule)
   - Minor text truncation on DevOps label in Engineering drilldown
+  image_description: |-
+    The plot displays a central donut chart showing "Company Budget Breakdown with Drilldown Navigation" with the subtitle "pie-drilldown · altair · pyplots.ai". The main central pie shows four departments: Engineering ($1.75M, 54.8% in blue), Marketing ($0.74M, 23.1% in orange), Operations ($0.48M, 15.2% in teal), and HR ($0.22M, 6.9% in purple). The center displays "Total Budget $3.20M".
+
+    Four smaller drilldown pie charts surround the main chart - Engineering breakdown (left top: Frontend, Backend, DevOps), Operations breakdown (left bottom: Facilities, IT Support), Marketing breakdown (right top: Digital, Content, Events), and HR breakdown (right bottom: Recruiting, Training). Each drilldown shows percentages and dollar amounts. A breadcrumb appears at top: "All Departments › Click any slice to explore team breakdown". An instruction line at the bottom explains interactivity. The colors use consistent shades within each department's drilldown. All text is legible and the layout effectively shows the hierarchical relationship.
+  criteria_checklist:
+    visual_quality:
+      score: 33
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: All text readable, though some drilldown labels slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Minor overlap on "DevOps: $500K (28.5%)" label gets cut'
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: All pies well-sized, good donut proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color scheme with distinct hues, shades differentiate within
+          groups
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent layout with center main chart and drilldowns on sides
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed; inline labels work well
+    spec_compliance:
+      score: 20
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart with hierarchical drilldown
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to slice sizes
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Has breadcrumb and hierarchy display; but lacks true click navigation
+          (shows all levels at once rather than drilling on click)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses subtitle format, but main title doesn't include spec-id
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 2-level hierarchy, multiple categories per level
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company budget breakdown is excellent real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Budget values are realistic for company departments
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses helper function (create_drilldown), not pure KISS
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair syntax
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Altair's declarative composition, selection parameters,
+          tooltips, and vconcat/hconcat for layout
+  verdict: APPROVED
diff --git a/plots/pie-drilldown/metadata/bokeh.yaml b/plots/pie-drilldown/metadata/bokeh.yaml
index a8d9baa818..21f771100a 100644
--- a/plots/pie-drilldown/metadata/bokeh.yaml
+++ b/plots/pie-drilldown/metadata/bokeh.yaml
@@ -29,3 +29,170 @@ review:
     shades
   - The Human Resources slice label text is partially difficult to read due to smaller
     slice size and label positioning
+  image_description: |-
+    The plot displays a pie chart for "Total Expenses" with 4 slices representing company departments. The title "pie-drilldown · bokeh · pyplots.ai" appears at the top in blue. A breadcrumb showing "Total Expenses" is positioned above the pie. The four slices are:
+    - **Operations**: $1320K (34.1%) - Large teal/green slice on the right
+    - **Marketing**: $900K (23.3%) - Teal/green slice at the bottom
+    - **Research**: $1000K (25.8%) - Pink/magenta slice on the left
+    - **Human Resources**: $650K (16.8%) - Coral/red slice at the top-left
+
+    Each slice has white bold text labels showing category name, dollar value, and percentage. The text "Click a slice to drill down" appears at the bottom in gray. The background is a light off-white (#fafafa). The pie chart is well-proportioned and fills a good portion of the canvas (square 3600x3600 format).
+  criteria_checklist:
+    visual_quality:
+      score: 33
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Labels are readable with 30pt bold font, though white text on lighter
+          teal/green (Marketing slice) has reduced contrast
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels well-positioned within slices
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized with radius 0.9, good use of canvas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Two slices use similar teal/green shades (Operations and Marketing),
+          making them harder to distinguish. The colorblind-safe palette was partially
+          applied but colors repeat too early
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, pie fills canvas well, but slight imbalance with
+          more whitespace at bottom
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed, labels serve as legend
+    spec_compliance:
+      score: 19
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart with drilldown capability
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to slice sizes
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: false
+        comment: 'Has breadcrumb, click indicator, labels with % and values. Missing:
+          animated transitions (static PNG cannot show this, but HTML should), consistent
+          color schemes within branches (colors cycle rather than branch-consistent)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: No traditional legend, but labels are embedded. The spec does not
+          require a legend, so this is acceptable
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "pie-drilldown · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 categories at root level, hierarchical structure with 3-4
+          levels. Missing demonstration of all drill-down levels in static image
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company expense breakdown is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in hundreds of thousands are realistic for company expenses
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Code uses inline structure but is complex with JavaScript callbacks,
+          multiple sources; not simple KISS style
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed as data is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Bokeh's TapTool, CustomJS callbacks, ColumnDataSource
+          for interactivity, export_png for static output, and HTML save for interactive
+          version
+  verdict: APPROVED
diff --git a/plots/pie-drilldown/metadata/highcharts.yaml b/plots/pie-drilldown/metadata/highcharts.yaml
index 5e07c94e20..914b5d984e 100644
--- a/plots/pie-drilldown/metadata/highcharts.yaml
+++ b/plots/pie-drilldown/metadata/highcharts.yaml
@@ -22,3 +22,169 @@ review:
   weaknesses:
   - Static PNG cannot demonstrate the core drilldown interactivity (inherent limitation)
   - Legend positioned far right could be closer to the chart for better visual grouping
+  image_description: 'The plot displays a pie chart showing "Company Revenue by Department"
+    with 5 slices. The title "pie-drilldown · highcharts · pyplots.ai" appears at
+    the top with a subtitle indicating click-to-drill-down functionality. The slices
+    are colored: dark blue (Engineering: $4,500,000, 34.6%), golden yellow (Sales:
+    $3,200,000, 24.6%), purple (Marketing: $1,800,000, 13.8%), cyan (Operations: $2,100,000,
+    16.2%), and brown (Research: $1,400,000, 10.8%). Data labels with name, dollar
+    value, and percentage are positioned outside each slice with connector lines.
+    A vertical legend on the right lists all departments with color indicators. The
+    pie chart is well-sized and centered, using approximately 50-60% of the canvas
+    area.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, labels, and legend are clearly readable at full size. Slightly
+          smaller than ideal for the canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all labels are well-positioned with adequate
+          spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are appropriately sized; good use of canvas space
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette used (blue, yellow, purple, cyan, brown)
+          - no red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout; legend slightly far from the chart
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-formatted and placed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie chart with drilldown capability
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to slice sizes
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has drilldown series, breadcrumbs configured, percentage/value labels.
+          However, the static PNG cannot show the animated transitions or click interactivity.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend matches data correctly
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title format correct but uses em-dash in subtitle instead of standard
+          hyphen
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 5 top-level departments with 3-4 sub-departments each in drilldown.
+          Good hierarchy depth but static image only shows top level.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company revenue by department is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue values ($1.4M-$4.5M) are realistic for department budgets
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → config → HTML → screenshot'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: 'Data is deterministic (hardcoded), but no random seed needed. Minor:
+          relies on external URL fetch.'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts drilldown API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly, but also saves plot.html which is good
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Highcharts drilldown module, breadcrumbs, accessibility
+          features, and interactive configuration
+  verdict: APPROVED
diff --git a/plots/pie-drilldown/metadata/plotly.yaml b/plots/pie-drilldown/metadata/plotly.yaml
index 03fb48eaff..2b05998c49 100644
--- a/plots/pie-drilldown/metadata/plotly.yaml
+++ b/plots/pie-drilldown/metadata/plotly.yaml
@@ -25,3 +25,172 @@ review:
     top-left area
   - Static PNG cannot demonstrate the drilldown functionality - consider adjusting
     breadcrumb position to avoid overlap
+  image_description: 'A donut-style pie chart displaying a company budget breakdown
+    for FY 2024 across four departments. The chart uses a colorblind-safe palette:
+    Engineering (blue, 37.1%), Marketing (purple, 19.8%), Operations (green, 20.5%),
+    and Sales (red-orange, 22.6%). The title "pie-drilldown · plotly · pyplots.ai"
+    appears centered at the top. A breadcrumb indicator "📍 All Departments" is positioned
+    in the top-left corner. Each slice is slightly pulled out with white borders separating
+    them. Labels with department names and percentages are positioned outside the
+    slices using connector lines. The center displays "Company Budget FY 2024". A
+    legend titled "Departments" is placed on the right side. At the bottom, instructional
+    text "👆 Click any slice to drill down into subcategories" guides users on interaction.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title 28pt, labels 20pt, all clearly readable. Minor: annotation
+          text could be slightly larger'
+      - id: VQ-02
+        name: No Overlap
+        score: 6
+        max: 8
+        passed: true
+        comment: Minor overlap between "All Departments" breadcrumb and "Marketing"
+          label at top-left
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices well-sized, donut hole appropriate, slight pull effect
+          enhances visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette (blue, purple, green, red-orange)
+          with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart well-centered, good use of canvas space, legend properly positioned
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well placed but no grid (expected for pie charts)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie/donut chart with drilldown concept
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to slice sizes
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has breadcrumb, click instruction, animated transitions (in HTML),
+          but static PNG shows only top level. Missing visual indicator (cursor change)
+          shown only in HTML
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to ~100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly matches slice colors and labels
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format but lowercase "plotly" (minor)
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical budget data with 4 departments and subcategories
+          defined. Static image only shows top level
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company budget breakdown is a perfect real-world scenario for drilldown
+          charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Budget values are realistic ($180K-$850K range), though some subcategory
+          values seem slightly arbitrary
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no randomness)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only json and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (correct for interactive library)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Plotly-specific features: hover templates, annotations,
+          donut chart with hole, animated transitions via JavaScript injection, HTML
+          export with interactivity'
+  verdict: APPROVED
diff --git a/plots/pie-drilldown/metadata/pygal.yaml b/plots/pie-drilldown/metadata/pygal.yaml
index 7dde2acad9..ef38a89ddb 100644
--- a/plots/pie-drilldown/metadata/pygal.yaml
+++ b/plots/pie-drilldown/metadata/pygal.yaml
@@ -26,3 +26,168 @@ review:
     pyplots.ai format
   - PNG preview shows only dollar values not percentages as specified in spec
   - Slice category labels appear small relative to the large canvas size
+  image_description: 'The plot displays a donut pie chart (ring chart with inner_radius=0.35)
+    showing a company budget breakdown across 4 departments. The chart uses distinct
+    colors: muted blue for Engineering ($450,000), yellow for Marketing ($280,000),
+    coral/orange for Operations ($180,000), and sage green for Human Resources ($90,000).
+    The title "Company Budget · pie-drilldown · pygal · pyplots.ai" appears at the
+    top, with a subtitle "All Departments | Click slice to drill down" at the bottom.
+    A legend at the bottom displays all department names with color-coded boxes. Dollar
+    values are displayed on each slice. The chart occupies a good portion of the 3600×3600
+    square canvas with balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and values readable, but slice labels showing category names
+          are quite small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Donut chart is well-sized, slices clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-friendly palette (blue, yellow, coral, green are distinct)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, chart fills canvas well with balanced margins
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom, clean presentation
+    spec_compliance:
+      score: 22
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pie/donut chart type for hierarchical drilldown
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to slice sizes
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has breadcrumb indicator, values shown; click hint present. PNG is
+          static but HTML has full interactivity (-1 for static preview lacking percentage
+          display mentioned in spec)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All categories visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: true
+        comment: Title shows "Company Budget · pie-drilldown · pygal · pyplots.ai"
+          but should be "{spec-id} · {library} · pyplots.ai" format (pie-drilldown
+          · pygal · pyplots.ai)
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows hierarchical data with 4 main categories, each having subcategories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company budget breakdown is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values ($25K-$450K) are plausible for departments
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Has helper functions (format_value_with_percent, create_pygal_chart)
+          which are necessary for the drilldown feature
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random values
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Outputs plot.png and plot.html (correct)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses pygal's SVG rendering, xlink for interactivity, custom Style,
+          inner_radius for donut effect, value_formatter, HTML generation with embedded
+          SVG
+  verdict: APPROVED
diff --git a/plots/pie-exploded/metadata/bokeh.yaml b/plots/pie-exploded/metadata/bokeh.yaml
index 5545de012f..7e46ce8c68 100644
--- a/plots/pie-exploded/metadata/bokeh.yaml
+++ b/plots/pie-exploded/metadata/bokeh.yaml
@@ -26,3 +26,170 @@ review:
     the right
   - Only one slice is exploded; spec mentions 1-3 slices could be exploded for demonstration
   - Missing HoverTool which would enhance the interactive HTML output with tooltips
+  image_description: 'The plot displays an exploded pie chart with the title "pie-exploded
+    · bokeh · pyplots.ai" at the top. The chart shows 5 market share segments: "Tech
+    Giant" (42.0%) in Python blue (#306998) which is visually exploded/separated from
+    the center, "Startup A" (18.0%) in yellow, "Startup B" (15.0%) in green, "Others"
+    (14.0%) in purple, and "Legacy Corp" (11.0%) in orange. Each slice has a white
+    percentage label positioned inside. The legend is placed on the right side showing
+    category names with their percentages. The pie chart uses a 1:1 square format
+    (3600x3600) with clean white background and no axes/grid (appropriate for pie
+    charts). The explosion effect is clearly visible on the largest "Tech Giant" slice
+    which is offset from the center.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 36pt, labels at 24pt, legend at 28pt - all clearly readable,
+          though legend text could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well positioned inside slices
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized, filling good portion of canvas, explosion
+          effect clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with distinct hues (blue, yellow, green,
+          purple, orange)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas, pie is well-centered, legend placement
+          is reasonable but slightly far from chart
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate), legend well formatted with transparent background
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct exploded pie chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has explosion effect, percentage labels, distinct colors, legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match categories with accurate percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "pie-exploded · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows explosion effect well on largest slice; could demonstrate multiple
+          exploded slices per spec note
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share scenario is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable (42%, 18%, 15%, 14%, 11%), though slightly
+          generic distribution
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Uses deterministic data (good), but no np.random.seed even though
+          numpy is imported
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's wedge glyph, ColumnDataSource for labels, LabelSet;
+          generates HTML for interactivity. Could leverage more advanced Bokeh features
+          like HoverTool for tooltips.
+  verdict: APPROVED
diff --git a/plots/pie-exploded/metadata/highcharts.yaml b/plots/pie-exploded/metadata/highcharts.yaml
index 6f7f3e5ade..8b3711599c 100644
--- a/plots/pie-exploded/metadata/highcharts.yaml
+++ b/plots/pie-exploded/metadata/highcharts.yaml
@@ -27,3 +27,171 @@ review:
     on right side'
   - Data variation could be more dramatic to better showcase the explosion feature
     contrast
+  image_description: 'The plot shows an exploded pie chart titled "pie-exploded ·
+    highcharts · pyplots.ai" with a subtitle "Market Share by Company". The chart
+    displays 6 segments representing different tech companies: TechCorp (35.0%, blue,
+    exploded), DataSoft (25.0%, yellow, exploded), CloudNet (18.0%, purple), InnoSys
+    (12.0%, cyan), WebScale (7.0%, brown), and CoreLogic (3.0%, pink). The two largest
+    segments (TechCorp and DataSoft) are visually separated/exploded from the main
+    pie to emphasize the market leaders. Data labels show company names with percentage
+    values. A legend is positioned on the right side with all category names and colored
+    indicators. The chart uses a white background with clean, professional styling.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (72px), data labels are clearly readable
+          (36px), legend text is appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels are well-positioned around the pie
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized (65% of chart area), explosion offset is
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with distinct blue, yellow, purple,
+          cyan, brown, pink colors
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout but pie is slightly off-center due to legend
+          placement on right; minor imbalance
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right side, no grid needed for pie chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct exploded pie chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to pie slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has explosion effect, percentage labels, distinct colors, legend
+          for category identification
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately shows all 6 categories with correct colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "pie-exploded · highcharts · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows explosion on 2 slices (leader and runner-up), varied slice
+          sizes; could show more dramatic size variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by company is a perfect real-world scenario for exploded
+          pie charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible (35%, 25%, 18%, 12%, 7%, 3% = 100%); could have
+          more dramatic variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Highcharts-specific sliced property for explosion, slicedOffset
+          for explosion distance, allowPointSelect for interactivity, proper series
+          configuration
+  verdict: APPROVED
diff --git a/plots/pie-exploded/metadata/letsplot.yaml b/plots/pie-exploded/metadata/letsplot.yaml
index fb50f274f3..71376899f7 100644
--- a/plots/pie-exploded/metadata/letsplot.yaml
+++ b/plots/pie-exploded/metadata/letsplot.yaml
@@ -23,3 +23,162 @@ review:
   weaknesses:
   - Legend text could be slightly larger for better readability at full resolution
   - Pie chart is slightly left-of-center due to legend placement
+  image_description: 'The plot shows an exploded pie chart with 6 department budget
+    segments on a white background. The title "pie-exploded · letsplot · pyplots.ai"
+    appears at the top center. The pie chart displays: R&D (green, 32.0%) and Sales
+    (orange, 20.0%) as exploded/separated slices, with Marketing (blue, 18.0%), Operations
+    (yellow, 15.0%), HR (purple, 8.0%), and IT (light blue, 7.0%) as non-exploded
+    slices. Each slice has a white percentage label. A legend titled "Department"
+    appears on the right side listing all categories with colored dots. The chart
+    uses distinct, colorblind-friendly colors with white stroke separating slices.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and percentage labels are clearly readable; legend text is
+          slightly small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized, explosion effect is clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with distinct hues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Pie uses good canvas space, though slightly off-center to the left
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right side, no grid needed for pie
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct exploded pie chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has explosion effect, percentage labels, distinct colors, legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "pie-exploded · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple exploded slices (R&D and Sales) with different explosion
+          amounts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, values are realistic for budgets
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses deterministic data but no random seed needed (no random data)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_pie with explode aesthetic and layer_labels, but could
+          leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/pie-exploded/metadata/matplotlib.yaml b/plots/pie-exploded/metadata/matplotlib.yaml
index c286d50f77..b298ba12b6 100644
--- a/plots/pie-exploded/metadata/matplotlib.yaml
+++ b/plots/pie-exploded/metadata/matplotlib.yaml
@@ -24,3 +24,162 @@ review:
   - Could demonstrate exploding multiple slices (e.g., 2 slices) to better showcase
     the feature flexibility
   - Minor layout imbalance with legend placement causing slight asymmetry
+  image_description: 'The plot displays an exploded pie chart titled "pie-exploded
+    · matplotlib · pyplots.ai". The chart shows market share distribution across 5
+    companies: TechCorp (35%, dark blue, exploded/separated from center), DataSoft
+    (25%, yellow), CloudNet (18%, green), AIVentures (12%, purple), and Others (10%,
+    orange). The TechCorp slice is visually separated from the pie to emphasize the
+    market leader. All slices have white edge separators and percentage labels in
+    white bold text positioned inside each slice. Category labels are displayed outside
+    each slice in black text. A legend box titled "Companies" is positioned to the
+    right of the pie chart listing all five categories with their corresponding colors.
+    The pie chart uses a square aspect ratio and fills the canvas well.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, percentages at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels and percentages are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized, explosion effect is clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, green, purple, orange)
+          with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but legend creates some imbalance to the right;
+          slight excess whitespace at bottom
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-positioned and properly formatted with title
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct exploded pie chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Explosion effect, percentage labels, distinct colors, and legend
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match slice labels exactly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "pie-exploded · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows explosion feature well, but only one slice is exploded (spec
+          suggests 1-3 slices could be exploded to show more variation)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech market share is a plausible, realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Market share percentages are realistic (35%, 25%, 18%, 12%, 10% =
+          100%)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed, but data is deterministic so this is acceptable (partial
+          deduction)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/pie-exploded/metadata/plotly.yaml b/plots/pie-exploded/metadata/plotly.yaml
index af04bb88be..4abe461111 100644
--- a/plots/pie-exploded/metadata/plotly.yaml
+++ b/plots/pie-exploded/metadata/plotly.yaml
@@ -25,3 +25,174 @@ review:
     centered better
   - Market share values could show more dramatic contrast to better demonstrate the
     exploded emphasis effect
+  image_description: The plot displays a pie chart showing market share analysis with
+    6 segments. Company A (35%, yellow) is the largest slice and is visibly exploded/separated
+    from the center. Company B (22%, dark blue), Company C (18%, teal), Company D
+    (12%, green), and Company E (8%, purple) form the main body. "Others" (5%, gray)
+    is also slightly exploded. Each slice displays the company name and percentage
+    outside the slice with connector lines. A vertical legend appears on the right
+    side. The title "Market Share Analysis · pie-exploded · plotly · pyplots.ai" is
+    centered at the top. White borders separate the slices, and the chart uses a clean
+    white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear (~32pt), slice labels are readable (~22pt),
+          legend text is appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All labels are positioned outside slices with no overlap, clear spacing
+          between elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized, explosion effect is clearly visible, proportions
+          are clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast; yellow, blue, teal, green,
+          purple, and gray are distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of square canvas for pie chart, but some empty space at
+          bottom; pie could be slightly larger or more centered
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed on right side with semi-transparent background
+          and border
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct exploded pie chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Explosion distance controllable per slice, percentage labels present,
+          distinct colors, legend included, 1-2 slices exploded maintaining visual
+          clarity
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 6 categories visible with correct proportions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match slice categories exactly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows correct format: "Market Share Analysis · pie-exploded
+          · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows exploded slices (market leader + smallest), varying segment
+          sizes; could show more dramatic size contrast
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share analysis is a perfect real-world use case for exploded
+          pie charts, company naming is sensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentages add to 100%, values are realistic; market leader at 35%
+          is plausible but modest
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed as data is deterministic, but data is hardcoded
+          (acceptable for this case - giving 3/3 on reconsideration)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only `plotly.graph_objects` imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Plotly's `go.Pie` with `pull` parameter for explosion, hover
+          templates for interactivity, saves both PNG and interactive HTML
+  verdict: APPROVED
diff --git a/plots/pie-exploded/metadata/plotnine.yaml b/plots/pie-exploded/metadata/plotnine.yaml
index 2fed21cb45..8b643769a3 100644
--- a/plots/pie-exploded/metadata/plotnine.yaml
+++ b/plots/pie-exploded/metadata/plotnine.yaml
@@ -24,3 +24,171 @@ review:
   - Code uses a helper function (create_pie_segment) which violates the KISS principle
     of no functions/classes
   - Red and green segments could pose mild colorblind accessibility issues
+  image_description: 'The plot displays an exploded pie chart showing "Market Share
+    by Company" with 6 segments. The largest segment (TechCorp, 32.0%) is exploded/separated
+    from the center, rendered in Python blue (#306998). Other segments clockwise from
+    TechCorp: DataSoft (24.0%) in yellow, CloudInc (18.0%) in green, NetWorks (12.0%)
+    in red, DevHub (8.0%) in purple, and Others (6.0%) in teal. Each slice has a white
+    percentage label in bold. A custom legend is positioned to the right showing colored
+    squares with company names and percentages. The title "Market Share by Company
+    · pie-exploded · plotnine · pyplots.ai" appears at the top center. Clean white
+    background with no axes or grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and legend text are clear; percentage labels inside slices
+          are readable but could be slightly larger for smaller slices
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized and clearly visible, explosion effect is
+          noticeable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color variety; red and green segments are distinguishable but
+          could be more colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, pie is well-centered with legend properly
+          placed to the right
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Custom legend is clean and well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct exploded pie chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Explosion effect present, percentage labels on slices, distinct colors,
+          legend included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 6 categories visible, proportions sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all categories with percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Market Share by Company · pie-exploded · plotnine
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows explosion feature well; only one slice exploded (spec suggests
+          1-3 is typical)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share by tech companies is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (32%, 24%, 18%, 12%, 8%, 6% = 100%); percentages
+          are plausible
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses a helper function `create_pie_segment()` which violates KISS
+          principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of plotnine's grammar of graphics with geom_polygon
+          to manually construct pie segments; demonstrates advanced plotnine usage
+          for a chart type not natively supported
+  verdict: APPROVED
diff --git a/plots/pie-exploded/metadata/pygal.yaml b/plots/pie-exploded/metadata/pygal.yaml
index 70bfd3ccf7..ebb11a3914 100644
--- a/plots/pie-exploded/metadata/pygal.yaml
+++ b/plots/pie-exploded/metadata/pygal.yaml
@@ -22,3 +22,159 @@ review:
   weaknesses:
   - Code uses a function definition (explode_slices) which deviates from KISS structure,
     though justified by pygal API design requiring a callable for add_xml_filter
+  image_description: 'The plot displays an exploded pie chart titled "Market Share
+    Analysis · pie-exploded · pygal · pyplots.ai". It shows 5 company segments: TechCorp
+    (35.2%, dark blue, exploded outward), DataFlow (22.8%, yellow), CloudBase (18.5%,
+    teal/cyan, slightly exploded), NetSys (14.3%, coral/salmon), and Others (9.2%,
+    gray). The pie chart fills approximately 60-70% of the square canvas. Percentage
+    labels are displayed within each slice. A legend at the bottom shows all five
+    categories in a horizontal row with colored squares. The colors are distinct and
+    visually appealing. The TechCorp slice (market leader) is clearly separated/exploded
+    from the center, and CloudBase has a smaller explosion effect.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, percentage labels, and legend all clearly readable at the
+          large canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well positioned within slices
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized, explosion effect clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Good color contrast, no red-green only differences, colorblind-friendly
+          palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Pie chart well-centered, good use of canvas space, legend properly
+          positioned at bottom
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct exploded pie chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to slice proportions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Explosion effect present, percentage labels shown, distinct colors,
+          legend included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Market Share Analysis · pie-exploded · pygal
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows explosion on 2 slices (leader and third place), demonstrating
+          controllable explosion distance. Could show slightly more variation in explosion
+          distances
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share analysis is a realistic, comprehensible scenario with
+          plausible company names
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, realistic market share distribution. Values
+          are realistic for tech market share
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (pygal, math, Style)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses pygal's add_xml_filter for custom SVG manipulation, custom Style,
+          value_formatter, legend_at_bottom_columns - demonstrates advanced pygal
+          capabilities
+  verdict: APPROVED
diff --git a/plots/pie-exploded/metadata/seaborn.yaml b/plots/pie-exploded/metadata/seaborn.yaml
index d18c911b6e..55f330007f 100644
--- a/plots/pie-exploded/metadata/seaborn.yaml
+++ b/plots/pie-exploded/metadata/seaborn.yaml
@@ -27,3 +27,164 @@ review:
     could improve canvas utilization
   - The seaborn library is primarily used for styling rather than core plotting (acceptable
     since seaborn has no native pie chart)
+  image_description: 'The plot shows an exploded pie chart visualizing market share
+    analysis. The chart displays 5 company segments: TechCorp (35.2%, dark blue, exploded
+    outward from the center), DataSoft (22.8%, yellow), CloudNet (18.5%, teal), AIVentures
+    (14.3%, coral/salmon), and CyberSys (9.2%, light mint green). All slices have
+    white percentage labels with bold text inside them. The TechCorp slice is visibly
+    separated from the center, emphasizing it as the market leader. A legend on the
+    right side shows company names with their percentages. The title reads ''Market
+    Share Analysis · pie-exploded · seaborn · pyplots.ai'' in bold at the top. The
+    chart uses white edge lines between slices and has a clean white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, legend, and percentage labels are all clearly readable with
+          appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clearly positioned within
+          their slices
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Pie slices are well-sized and proportioned, wedges clearly visible
+          with white edges
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good distinct colors, though some shades could be improved for full
+          colorblind safety
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, pie chart is well-sized, minor imbalance
+          with legend positioning creating some empty space at bottom
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-styled with shadow and positioned appropriately
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct chart type (exploded pie chart)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped to slices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: exploded slice, percentage labels, distinct
+          colors, legend'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, percentages sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all categories with percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: ''Market Share Analysis · pie-exploded · seaborn
+          · pyplots.ai'''
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows exploded slice for leader, but could demonstrate multiple exploded
+          slices as spec allows 1-3
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share analysis is a perfect real-world use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic percentages that sum to 100%; 5 categories is
+          within optimal range
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no classes/functions
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic but no random seed (though not strictly needed
+          here since no random data)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and seaborn imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/point-basic/metadata/altair.yaml b/plots/point-basic/metadata/altair.yaml
index ce03ae0158..21bbdd8480 100644
--- a/plots/point-basic/metadata/altair.yaml
+++ b/plots/point-basic/metadata/altair.yaml
@@ -25,3 +25,176 @@ review:
   - Axis label Effect Size lacks units (could be Effect Size Cohens d or similar)
   - Library features underutilized - could add selection/highlighting for interactivity
   - The seed(42) call is unnecessary since no random data generation actually occurs
+  image_description: The plot displays a horizontal point estimate chart with 6 groups
+    (Treatment A, B, C, D, Control, Placebo) on the y-axis and "Effect Size" on the
+    x-axis (ranging from -3.0 to 5.0). Each group shows a blue filled circle marker
+    at its estimate value, with horizontal error bars extending to confidence interval
+    bounds. The error bars have vertical caps at both endpoints. A dashed gray vertical
+    reference line is positioned at x=0 (null hypothesis). The color scheme uses a
+    consistent blue (#306998) for all data elements. The title "point-basic · altair
+    · pyplots.ai" appears centered at the top. Text is clearly readable, and the layout
+    effectively uses the canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Point markers (size=400) and error bars (strokeWidth=3) are appropriately
+          sized for 6 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, high contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight asymmetry with data clustered on
+          right side but this reflects the data
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "Effect Size" (descriptive) but no units specified
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid (clean look), no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct point estimate plot with confidence intervals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, estimates on X-axis, properly horizontal orientation
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Points, confidence intervals, error bar caps, reference line at zero
+          - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis scale (-3 to 5) shows all data points and intervals
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, tooltips included for interactivity
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "point-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive effects, negative effect (Treatment D), control groups,
+          varying CI widths - good variety but could show more extreme differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial treatment effects is a perfect, neutral real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Effect sizes in reasonable range, though the specific values are
+          somewhat arbitrary
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → layered chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) (though not actually used for random generation
+          in this case)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative layering system with mark_point, mark_rule,
+          mark_tick, but doesn't use interactive features like selections or dynamic
+          tooltips beyond basic
+  verdict: APPROVED
diff --git a/plots/point-basic/metadata/bokeh.yaml b/plots/point-basic/metadata/bokeh.yaml
index 17961d23b7..80ad9d69e6 100644
--- a/plots/point-basic/metadata/bokeh.yaml
+++ b/plots/point-basic/metadata/bokeh.yaml
@@ -24,3 +24,177 @@ review:
   - Could benefit from HoverTool to show exact values on mouse-over (Bokeh's key interactive
     feature)
   - Axis labels lack units (e.g., "Effect Size (Cohen's d)" would be more informative)
+  image_description: 'The plot shows a horizontal point estimate plot with 6 treatment
+    groups (Treatment A through E, plus Control) on the y-axis and Effect Size on
+    the x-axis. Each group has a yellow circular point marker with a blue outline
+    representing the estimate, connected to horizontal blue error bars (confidence
+    intervals) with T-shaped caps at each end. A vertical dashed gray reference line
+    is drawn at x=0 (null hypothesis). The title "point-basic · bokeh · pyplots.ai"
+    appears at the top left. Treatment groups show varying effect sizes: Treatment
+    A (~2.5), Treatment B (~1.8), Treatment C (~3.2), Treatment D (~-0.5, only one
+    crossing zero), Treatment E (~1.2), and Control (at 0). The confidence interval
+    widths vary appropriately across groups.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          size. Font sizes are appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels are well-spaced vertically.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Point markers are large and clearly visible; error bars have good
+          line width and distinct T-caps.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe; good contrast against
+          white background.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot is well-centered with balanced margins.
+          Minor deduction as there's slightly more whitespace than ideal.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Effect Size" and "Treatment Group" are descriptive but lack units.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha=0.3; no legend needed
+          for this plot type.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct point estimate plot with confidence intervals.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, estimates/intervals on X-axis (horizontal orientation
+          as spec recommends).
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has point estimates, error bars with caps, reference line at zero.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points and intervals are fully visible.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (single series).
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "point-basic · bokeh · pyplots.ai" format.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows positive and negative estimates, varying CI widths, reference
+          line crossing. Minor deduction: only one negative estimate.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Treatment effect comparison is a classic, neutral use case for point
+          estimates.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Effect sizes in reasonable range (-0.5 to 3.2); CI widths are plausible.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses Bokeh-specific features like Whisker with TeeHead and Span for
+          reference line, but doesn't leverage Bokeh's interactive capabilities (HoverTool,
+          etc.) which are its main strength.
+  verdict: APPROVED
diff --git a/plots/point-basic/metadata/highcharts.yaml b/plots/point-basic/metadata/highcharts.yaml
index e4523dae6c..09f7b89333 100644
--- a/plots/point-basic/metadata/highcharts.yaml
+++ b/plots/point-basic/metadata/highcharts.yaml
@@ -25,3 +25,178 @@ review:
   weaknesses:
   - Confidence intervals lack endpoint caps as mentioned in specification notes
   - Legend could be positioned closer to the plot area for better visual cohesion
+  image_description: The plot displays a horizontal point estimate chart showing department
+    performance scores with 95% confidence intervals. Eight departments (Marketing,
+    Engineering, Sales, Operations, Finance, HR, Research, Customer Support) are listed
+    on the left y-axis. Each department has a yellow circular point marker representing
+    the point estimate, with blue horizontal bars extending to show the confidence
+    interval range. A vertical dashed gray reference line is positioned at score 75.
+    The x-axis shows "Performance Score" ranging from 50 to 100. The title "point-basic
+    · highcharts · pyplots.ai" appears at the top with a subtitle explaining the data
+    context. The layout uses white background with subtle gray grid lines. Engineering
+    and Research show the highest scores (~85-88), while HR and Sales show lower scores
+    (~66-68).
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text clearly readable: title at 48px, axis labels at 36px, tick
+          labels at 24-28px'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, horizontal layout prevents label collisions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Point markers (radius 16) and CI bars clearly visible, well-sized
+          for 8 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent blue (#306998) and yellow (#FFD43B) combination, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with appropriate margins, slight excess whitespace
+          at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Department" and "Performance Score"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (good), but legend is at bottom with extra vertical
+          space separating it from plot
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct point estimate plot with confidence intervals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, estimates on x-axis, CI bars correct
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has horizontal orientation, distinct markers, reference line at 75;
+          missing endpoint caps on error bars as noted in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axis range 50-100 shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "95% CI" and "Point Estimate"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "point-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in estimates and different CI widths (asymmetric
+          intervals); Research has narrow CI, Finance has wide CI
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Department performance scores is a plausible business scenario, neutral
+          topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores 66-88 with CIs ranging ~±3-8 points are realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → chart config → series
+          → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" (correct) but also creates plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of inverted chart, columnrange series for CIs, plotLines
+          for reference line; could leverage more Highcharts-specific features like
+          data labels or tooltips in HTML version
+  verdict: APPROVED
diff --git a/plots/point-basic/metadata/letsplot.yaml b/plots/point-basic/metadata/letsplot.yaml
index 5cfe36f46a..440ba455f5 100644
--- a/plots/point-basic/metadata/letsplot.yaml
+++ b/plots/point-basic/metadata/letsplot.yaml
@@ -22,3 +22,174 @@ review:
   - All effect sizes are positive; including at least one negative or zero-crossing
     effect would better demonstrate the plot utility
   - Does not include error bar caps at endpoints (spec suggests caps)
+  image_description: The plot displays a horizontal point estimate plot with confidence
+    intervals. It shows 5 treatment groups (Treatment A, Treatment B, Treatment C,
+    Control, Placebo) on the y-axis and Effect Size (95% CI) on the x-axis. Each group
+    has a blue point (#306998) representing the estimate with horizontal lines extending
+    to show confidence intervals. A vertical dashed gray reference line is drawn at
+    x=0. The title "point-basic · lets-plot · pyplots.ai" appears at the top. The
+    plot uses a minimal theme with a clean white background, subtle horizontal grid
+    lines, and well-sized text throughout.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, axis titles at 20pt, axis text at 16pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, horizontal orientation prevents label overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points and confidence interval lines are clearly visible with good
+          sizing (size=1.5)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998), no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, balanced whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Effect Size (95% CI)" has units, but "Treatment Group" is descriptive
+          only'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Minimal theme with subtle grid, no legend needed (single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct point estimate plot with confidence intervals using geom_pointrange
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis (after coord_flip), estimates and CI bounds
+          correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Points, confidence intervals, reference line at zero, horizontal
+          orientation - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points and intervals fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A (full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "point-basic · lets-plot · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows varying CI widths and different effect sizes, but no negative
+          effects shown
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Research study with treatment groups is a neutral, realistic scientific
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Effect sizes 1.8-5.8 are reasonable, but all positive which limits
+          demonstration
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with scale=3
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with geom_pointrange, coord_flip, theme_minimal,
+          but doesn't leverage lets-plot unique features like tooltips or interactivity
+  verdict: APPROVED
diff --git a/plots/point-basic/metadata/matplotlib.yaml b/plots/point-basic/metadata/matplotlib.yaml
index 9034069c97..08e6735f16 100644
--- a/plots/point-basic/metadata/matplotlib.yaml
+++ b/plots/point-basic/metadata/matplotlib.yaml
@@ -21,3 +21,169 @@ review:
   - Clean, professional appearance suitable for publication
   weaknesses:
   - Could add alpha transparency to markers for even better visual distinction (minor)
+  image_description: The plot displays a horizontal point estimate chart showing customer
+    satisfaction scores (1-10 scale) across 7 departments. Each department has a blue
+    circular marker (#306998) with white edge representing the point estimate, and
+    horizontal error bars with caps showing 95% confidence intervals. A yellow/gold
+    dashed vertical reference line indicates the overall mean (6.8) with a text annotation.
+    The title "point-basic · matplotlib · pyplots.ai" is clearly displayed at the
+    top. The layout is clean with subtle gray grid lines on the x-axis and well-proportioned
+    margins.
+  criteria_checklist:
+    visual_quality:
+      score: 40
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers clearly visible with white edge, error bars well-sized with
+          caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Satisfaction Score (1-10)" includes units, "Department" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha=0.3), no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct point estimate plot with confidence intervals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Horizontal orientation with estimates on x-axis, categories on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct markers, error bars with caps, reference line present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Appropriate axis limits for data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Reference line labeled correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation in both estimates and CI widths (narrow for Shipping,
+          wide for Billing)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer satisfaction by department is a realistic, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores 5.4-8.3 on 1-10 scale are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) present
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of errorbar with asymmetric errors, axvline, markeredgecolor/markeredgewidth
+          styling
+  verdict: APPROVED
diff --git a/plots/point-basic/metadata/plotly.yaml b/plots/point-basic/metadata/plotly.yaml
index 87fb849dd1..f09930aa5b 100644
--- a/plots/point-basic/metadata/plotly.yaml
+++ b/plots/point-basic/metadata/plotly.yaml
@@ -25,3 +25,181 @@ review:
   - All treatment effects are positive; including one negative effect would better
     demonstrate full capability
   - Grid opacity at 0.1 is very subtle; 0.2-0.3 would improve readability
+  image_description: The plot displays a horizontal point estimate plot with 6 treatment
+    groups (Control, Treatment A through Treatment E) arranged vertically on the y-axis.
+    The x-axis shows "Effect Size (units)" ranging from approximately -1 to 5.5. Each
+    group has a blue circular marker (#306998) representing the point estimate, with
+    horizontal error bars extending to show the 95% confidence intervals. The error
+    bars have visible caps at the endpoints. A dashed yellow/gold vertical reference
+    line at x=0 is labeled "Null" at the top, representing the null hypothesis. The
+    Control group is centered at 0, while all treatment groups show positive effects
+    of varying magnitudes (Treatment D highest at ~4.2, Treatment C lowest at ~1.5).
+    The legend "Estimate ± 95% CI" is positioned in the bottom right corner with a
+    semi-transparent white background. The plot uses the plotly_white template with
+    subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18-20pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized at 18 with good visibility, error bars thickness=3
+          with width=10 caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with yellow reference line, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, margins well configured, slight excess
+          space on left
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Effect Size (units)" and "Treatment Group" are descriptive with
+          units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), but legend placement in bottom-right
+          corner is suboptimal for this horizontal layout - would be better positioned
+          outside the plot area or top-right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct point estimate plot with confidence intervals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, estimates on X-axis (horizontal orientation
+          per spec)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has point estimates, confidence intervals, reference line at zero,
+          error bar caps
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points and confidence intervals
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend label "Estimate ± 95% CI" accurately describes the data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "point-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying CI widths, positive and zero effects, but all treatments
+          positive (no negative treatment effect shown)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial treatment effects scenario is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Effect sizes 0-4.2 with CIs 0.8-1.5 are plausible, though units are
+          generic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Scatter with error_x and add_vline which are standard Plotly
+          features, but doesn't leverage Plotly's interactive capabilities in the
+          static output (hover templates, annotations with arrows, etc.)
+  verdict: APPROVED
diff --git a/plots/point-basic/metadata/plotnine.yaml b/plots/point-basic/metadata/plotnine.yaml
index 7447f75566..c80834abdf 100644
--- a/plots/point-basic/metadata/plotnine.yaml
+++ b/plots/point-basic/metadata/plotnine.yaml
@@ -23,3 +23,176 @@ review:
   weaknesses:
   - Grid styling uses non-standard alpha parameter in element_line (may cause warnings
     in some plotnine versions)
+  image_description: The plot displays a horizontal point estimate chart showing product
+    satisfaction scores across 8 categories. Each category (Product Quality, Product
+    Variety, Customer Service, Packaging, Return Process, Website Usability, Price
+    Value, Delivery Speed) is shown on the y-axis with blue circular points representing
+    the mean estimate and horizontal error bars showing confidence intervals. A vertical
+    dashed gray reference line is positioned at score 7.0. The x-axis shows "Satisfaction
+    Score (1-10)" ranging from approximately 5.3 to 8.7. The title reads "point-basic
+    · plotnine · pyplots.ai" in bold. The plot uses a clean minimal theme with subtle
+    gray gridlines. Categories are sorted by their estimate values from lowest (Delivery
+    Speed) to highest (Product Quality).
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 24pt, axis titles 20pt, tick labels 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points are clearly visible (size=5), error bars have good thickness
+          (size=1.5) and caps (height=0.3)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight margin imbalance on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with scale indication: "Satisfaction Score (1-10)"
+          and "Category"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend needed for this single-series
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct point estimate plot with confidence intervals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, estimates on X-axis (horizontal orientation
+          as spec suggests)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has point estimates, confidence intervals, reference line at 7.0,
+          error bar caps
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, x-axis appropriately scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "point-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying confidence interval widths and different estimate positions;
+          could show more extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product satisfaction survey is a plausible, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 1-10 satisfaction scale is realistic, though all values cluster between
+          5.3-8.7
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of ggplot2 grammar: geom_errorbarh, geom_point, geom_vline,
+          aes mapping, theme customization, categorical ordering'
+  verdict: APPROVED
diff --git a/plots/point-basic/metadata/pygal.yaml b/plots/point-basic/metadata/pygal.yaml
index 864cf720b5..57d2972695 100644
--- a/plots/point-basic/metadata/pygal.yaml
+++ b/plots/point-basic/metadata/pygal.yaml
@@ -24,3 +24,176 @@ review:
   - Confidence interval lines lack caps/endpoints as suggested in the specification
     notes
   - X-axis label Effect Size could include units or be more descriptive
+  image_description: 'The plot displays a horizontal point estimate visualization
+    with five categories (Treatment A, B, C, D, and Control) arranged vertically on
+    the y-axis. Each category shows a yellow/gold circular point marker representing
+    the point estimate, with blue horizontal lines extending on both sides to indicate
+    95% confidence intervals. A vertical gray reference line is positioned at x=0
+    (the null hypothesis line). The x-axis is labeled "Effect Size" with values ranging
+    from -1 to 4. The title "point-basic · pygal · pyplots.ai" appears at the top
+    in a clean monospace font. A legend at the bottom identifies the three visual
+    elements: Reference (x=0), 95% CI, and Point Estimate. The background is white
+    with subtle dotted horizontal guide lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and category labels are clearly readable; font
+          sizes are appropriate for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; categories well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Point markers are visible and well-sized; CI lines are clear; could
+          be slightly thicker for better visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue CI lines and yellow points provide excellent contrast; colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot is well-centered with appropriate
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "Effect Size" but no units specified
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle dotted grid lines; legend well-placed at bottom; grid could
+          be slightly less prominent
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct point estimate plot with confidence intervals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, effect sizes on X-axis, properly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has point estimates, CI lines, reference line; spec suggests error
+          bars should have caps at endpoints (missing)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points and intervals fully visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "point-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in effect sizes, different CI widths, includes control
+          group at zero; Treatment D CI crosses zero showing non-significance
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial treatment effects scenario is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Effect sizes are plausible; CI ranges are reasonable though some
+          are quite wide
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure; no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart with custom styling, SVG-based rendering; could
+          leverage more pygal-specific features like tooltips
+  verdict: APPROVED
diff --git a/plots/point-basic/metadata/seaborn.yaml b/plots/point-basic/metadata/seaborn.yaml
index bd6f475c01..0ef34f6796 100644
--- a/plots/point-basic/metadata/seaborn.yaml
+++ b/plots/point-basic/metadata/seaborn.yaml
@@ -24,3 +24,154 @@ review:
     confidence interval features
   - All confidence intervals are symmetric around estimates; real-world CIs are often
     asymmetric
+  image_description: The plot displays a horizontal point estimate chart showing satisfaction
+    scores (scale 1-10) for six products (A through F). Each product is represented
+    by a blue circular marker (#306998) with horizontal error bars extending to show
+    confidence intervals. The error bars have clear caps at endpoints. A vertical
+    dashed yellow/gold line indicates the overall mean (6.9). The title follows the
+    correct format "point-basic · seaborn · pyplots.ai". X-axis is labeled "Satisfaction
+    Score (1-10)" and Y-axis is labeled "Product". A legend in the lower right explains
+    the overall mean reference line. The plot has a clean white background with subtle
+    vertical grid lines (alpha ~0.3). The layout is well-balanced with the chart filling
+    the canvas appropriately.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points clearly visible with appropriate marker size (15), error bars
+          thick (3px) with visible caps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Satisfaction Score (1-10)" includes units/scale, "Product" is descriptive'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct point estimate plot with confidence intervals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, estimates on X-axis (horizontal orientation
+          as spec recommends)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Points visible, confidence intervals with caps, reference line included
+          as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis 3-10 shows all data with context
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes overall mean line
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "point-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying confidence interval widths (0.5 to 1.5), points above/below
+          mean, but all CIs are symmetric
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product satisfaction ratings is a real, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Satisfaction scores 5.9-8.1 on 1-10 scale are realistic; could show
+          wider range of estimates
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/polar-bar/metadata/altair.yaml b/plots/polar-bar/metadata/altair.yaml
index 43c729fe99..5202ac5e27 100644
--- a/plots/polar-bar/metadata/altair.yaml
+++ b/plots/polar-bar/metadata/altair.yaml
@@ -27,3 +27,178 @@ review:
   - Scale factor 4.5 produces approximately 4050x4050 pixels which does not match
     the standard 3600x3600 or 4800x2700 target sizes
   - Could demonstrate stacked bars for wind speed ranges as mentioned in spec notes
+  image_description: The plot displays a polar bar chart (wind rose) with 8 wedge-shaped
+    bars radiating from a central point. Each bar represents a compass direction (N,
+    NE, E, SE, S, SW, W, NW) with direction labels positioned around the perimeter
+    in dark blue (#306998). The bars use a sequential blue color scheme from light
+    blue (low frequency ~6-8) to dark blue (high frequency ~20-22). The W (West) direction
+    has the longest/darkest bar indicating highest frequency, followed by SW and NW,
+    showing prevailing westerlies pattern. The title "polar-bar · altair · pyplots.ai"
+    is displayed at the top in dark gray. A legend on the right shows "Frequency"
+    with a gradient scale from approximately 8 to 22. Bars have white stroke separators.
+    The chart uses a square aspect ratio (900x900) and fills the canvas well.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt, direction labels at 20pt bold, legend labels at 16-18pt
+          - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, direction labels well-positioned outside the
+          chart
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar wedges are appropriately sized with good separation via white
+          strokes
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential blue scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with square 900x900, chart is well-centered
+          but legend placement creates slight asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels present (polar charts typically don't have traditional
+          axes, but legend title "Frequency (days)" provides context)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right with clear gradient, no distracting grid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar bar chart / wind rose implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction correctly mapped to angle, frequency to radius
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: bars radiating from center, 8 compass
+          directions, color encoding magnitude'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, bars extend proportionally from center
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Frequency (days)" with accurate color scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "polar-bar · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across directions with prevailing westerlies pattern;
+          could include stacking for wind speed ranges to show full wind rose capability
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind frequency data is realistic and neutral scientific context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 6-22 days are realistic for wind frequency, though could benefit
+          from clearer unit context
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: altair, numpy, pandas'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair 5.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html correctly, but scale_factor=4.5
+          produces non-standard resolution
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses mark_arc effectively for polar bars, includes tooltips and interactive()
+          for interactivity, but could leverage more Altair features like selections
+          or layered annotations
+  verdict: APPROVED
diff --git a/plots/polar-bar/metadata/bokeh.yaml b/plots/polar-bar/metadata/bokeh.yaml
index 2189fdacb3..b8405bd269 100644
--- a/plots/polar-bar/metadata/bokeh.yaml
+++ b/plots/polar-bar/metadata/bokeh.yaml
@@ -23,3 +23,175 @@ review:
   - Scale labels show percent symbol but data represents raw frequency counts, which
     is semantically inconsistent
   - No legend or annotation explaining what the colors and values represent
+  image_description: The plot displays a polar bar chart (wind rose) with 8 compass
+    directions (N, NE, E, SE, S, SW, W, NW) arranged in a circle. The bars are rendered
+    as wedges radiating from the center, with the W (West) direction having the longest
+    bar extending to approximately 75% of the radius, followed by SW (Southwest).
+    The bars use a Blues8 color palette where darker blues indicate higher frequencies.
+    Three concentric dashed reference circles are visible at 7%, 14%, and 22% intervals
+    with subtle gray labeling. Direction labels are displayed in bold blue (#306998)
+    text around the outer perimeter. The title "polar-bar · bokeh · pyplots.ai" appears
+    at the top in Python blue. The background is a subtle off-white (#fafafa), and
+    the overall layout is clean and well-balanced in a square 3600×3600 format.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, direction labels at 32pt, scale labels at 20pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels well-spaced around perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Wedges are clearly visible with good alpha (0.9) and distinct borders
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues8 sequential palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas, plot fills ~65% of area, minor whitespace
+          at edges
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No traditional axis labels (acceptable for polar plot, but no legend
+          explaining what values represent)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed reference circles, no legend needed for this simple
+          visualization
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar bar/wind rose chart type using wedges
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Directions correctly mapped to angles, frequencies to bar lengths
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: bars radiating outward, 8 compass directions,
+          color encoding magnitude'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the plot area
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Direction labels accurate, scale labels correct
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "polar-bar · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying frequencies across directions with clear pattern (prevailing
+          W/SW winds), could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind frequency data is a classic, neutral meteorological scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible wind frequencies, though labeling as "%" is
+          slightly misleading for raw counts
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean flat structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Bokeh's wedge glyph, ColumnDataSource, Label annotations,
+          and figure customization
+  verdict: APPROVED
diff --git a/plots/polar-bar/metadata/highcharts.yaml b/plots/polar-bar/metadata/highcharts.yaml
index 013b3f62d0..74042d9f7f 100644
--- a/plots/polar-bar/metadata/highcharts.yaml
+++ b/plots/polar-bar/metadata/highcharts.yaml
@@ -23,3 +23,180 @@ review:
   - Y-axis label says Frequency (%) but data values appear to be raw counts, not percentages
   - Could leverage more Highcharts interactive features like data labels on segments
     or custom tooltips
+  image_description: 'The plot displays a polar bar chart (wind rose) on a white background.
+    The title "polar-bar · highcharts · pyplots.ai" appears at the top with subtitle
+    "Wind Speed Distribution by Direction". Eight compass directions (N, NE, E, SE,
+    S, SW, W, NW) are labeled around the perimeter in bold text. Stacked bars radiate
+    outward from the center, with four color-coded categories: dark blue (Calm <1
+    mph), bright yellow (Light 1-10 mph), purple (Moderate 10-20 mph), and cyan (Strong
+    >20 mph). The radial axis shows frequency values from 0 to 40, with concentric
+    grid circles. A legend is positioned on the right side. The data shows a realistic
+    prevailing westerly wind pattern with W, SW, and NW directions having the highest
+    frequencies.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, direction labels, and legend are all clearly readable.
+          Radial axis labels are slightly small but still legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the chart.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stacked bar segments are well-sized and clearly distinguishable.
+          Good use of white borders between segments.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette (blue, yellow, purple, cyan) - avoids red-green
+          combination.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Chart is well-positioned to the left with legend on right. Good use
+          of canvas space, though slightly more centered would be ideal.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Frequency (%)" label but the data appears to be counts,
+          not percentages. Minor inconsistency.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate. Legend is well-placed but slightly
+          far from the chart.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar bar/wind rose chart type.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angular categories correctly map to compass directions, radial values
+          correctly map to frequency/magnitude.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: bars radiating outward, stacked for multiple
+          categories (speed ranges), 8 compass directions, color encoding.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data is visible within the chart range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all four wind speed categories.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "polar-bar · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows stacking, multiple categories, directional variation. Could
+          include more dramatic variation between directions.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind data with prevailing westerlies is a scientifically accurate
+          and neutral meteorological scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Frequency values are reasonable (0-40 range). Label says "%" but
+          values appear to be counts.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart config → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used appropriately.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses Highcharts polar column chart correctly, but doesn't leverage
+          more advanced features like tooltips, animation, or data labels. The interactive
+          HTML export is good.
+  verdict: APPROVED
diff --git a/plots/polar-bar/metadata/letsplot.yaml b/plots/polar-bar/metadata/letsplot.yaml
index 0b956d7ce1..923669f164 100644
--- a/plots/polar-bar/metadata/letsplot.yaml
+++ b/plots/polar-bar/metadata/letsplot.yaml
@@ -22,3 +22,175 @@ review:
   - Y-axis label shows Frequency (%) but data appears to be absolute counts not percentages
   - Direction labels (NE, SE) positioned outside the plot area - could be more integrated
   - Consider adding radial gridlines for easier magnitude reading
+  image_description: 'The plot displays a polar bar chart (wind rose) with 8 compass
+    directions (N, NE, E, SE, S, SW, W, NW) arranged in a circle. Each direction has
+    a colored bar extending outward from the center, with bar length representing
+    frequency. The colors used are: N (blue), NE (light blue), E (yellow), SE (orange),
+    S (purple), SW (dark purple), W (green), NW (lighter green). The title "polar-bar
+    · letsplot · pyplots.ai" appears at the top. The y-axis is labeled "Frequency
+    (%)" with gridlines at intervals of 2 from 0 to 22. Direction labels are placed
+    around the outside of the chart. The W direction has the longest bar (~22), followed
+    by SW (~18), N (~15), and NW (~14). SE has the shortest bar (~5).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and prominent, axis labels and tick marks are clearly
+          readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, direction labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are clearly visible with good alpha (0.85) and white borders
+          for separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 8 distinct colors that are easily distinguishable; no red-green confusion
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas for polar chart, slight issue with direction
+          labels being outside plot area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Frequency (%)" label but data values appear to be counts,
+          not percentages (minor inconsistency)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but legend is hidden - while direction labels
+          around chart serve this purpose, the label positioning could be better integrated
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar bar chart / wind rose implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction correctly mapped to angle, frequency to bar height
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: bars radiating outward, 8 compass directions,
+          magnitude represented by bar length'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis extends appropriately to 22
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Direction labels correctly positioned around chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "polar-bar · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across directions with different frequencies; could
+          benefit from stacked categories as mentioned in spec notes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind frequency data is a real, neutral application for wind roses
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 5-22 are plausible for wind frequency data, though "Frequency
+          (%)" label suggests percentages while data looks like counts
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of coord_polar() and grammar of graphics approach, but relatively
+          standard implementation without advanced lets-plot features
+  verdict: APPROVED
diff --git a/plots/polar-bar/metadata/matplotlib.yaml b/plots/polar-bar/metadata/matplotlib.yaml
index 0de396bdab..a2897809c8 100644
--- a/plots/polar-bar/metadata/matplotlib.yaml
+++ b/plots/polar-bar/metadata/matplotlib.yaml
@@ -28,3 +28,181 @@ review:
     context
   - The figsize of 12x12 may not match the expected 3600x3600 at 300dpi (would be
     3600x3600 but library guidelines suggest 16x9)
+  image_description: 'The polar bar chart displays wind direction frequency data in
+    a classic wind rose format. The chart features 8 compass directions (N, NE, E,
+    SE, S, SW, W, NW) arranged radially with North at the top. Bars extend outward
+    from the center with three stacked color segments: deep blue (#306998) for "Calm
+    (0-5 m/s)", golden yellow (#FFD43B) for "Moderate (5-10 m/s)", and turquoise (#4ECDC4)
+    for "Strong (>10 m/s)". The W direction shows the highest total frequency (~45%),
+    while SE shows the lowest (~12%). Radial grid lines mark 10%, 20%, 30%, and 40%
+    levels. The title "polar-bar · matplotlib · pyplots.ai" appears at the top in
+    bold. A legend with "Wind Speed" title is positioned in the upper right outside
+    the plot area. The overall layout uses a square 1:1 aspect ratio with good canvas
+    utilization.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, direction labels at 18pt bold, radial labels
+          at 14pt, legend at 14-16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, direction labels and radial percentage
+          labels are well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths are appropriately sized (0.8 of sector), stacked segments
+          clearly distinguishable with white edge separators
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and turquoise palette is colorblind-safe with good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square format for polar chart, legend placement is sensible
+          but creates slight asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Radial labels show "10%", "20%", etc. which is descriptive but compass
+          directions are the categorical axis (no units needed)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), legend is well-placed but title
+          adds some visual weight
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar bar chart / wind rose implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angles correctly map to 8 compass directions, bar heights to frequencies
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has stacked bars for multiple categories per spec notes, extends
+          outward from center
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis limit (50) accommodates all data with headroom
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels accurately describe the three wind speed categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "polar-bar · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows stacked bars with 3 categories, 8 directions with varying frequencies;
+          could have more dramatic variation between directions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind rose is the canonical application for polar bar charts; meteorological
+          data is a neutral, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind frequencies as percentages are plausible; values are reasonable
+          though higher totals on W/E axis suggest prevailing wind patterns
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually hardcoded arrays
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves to "plot.png" without path prefix (minor; workflow handles
+          this)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib's polar projection, theta_zero_location, theta_direction,
+          rlabel_position - good use of polar-specific features but nothing exceptional
+  verdict: APPROVED
diff --git a/plots/polar-bar/metadata/plotly.yaml b/plots/polar-bar/metadata/plotly.yaml
index 2e2fefdcab..3084758a86 100644
--- a/plots/polar-bar/metadata/plotly.yaml
+++ b/plots/polar-bar/metadata/plotly.yaml
@@ -25,3 +25,175 @@ review:
     legibility
   - Does not leverage Plotly interactive features like custom hover templates
   - Legend text could benefit from slightly better styling to match overall polish
+  image_description: 'The plot shows a polar bar chart (wind rose) with 8 compass
+    directions (N, NE, E, SE, S, SW, W, NW) arranged clockwise with N at the top.
+    Three stacked bar categories represent wind speed ranges: Light (0-10 km/h) in
+    dark blue (#306998), Moderate (10-20 km/h) in yellow (#FFD43B), and Strong (20+
+    km/h) in teal/cyan (#4ECDC4). The bars radiate outward from the center, with W
+    and SW showing the highest wind frequencies. The plot has concentric circular
+    grid lines with radial axis labels showing "Frequency (%)" ranging from 5 to 35.
+    A legend in the upper right shows the three wind speed categories. The title "polar-bar
+    · plotly · pyplots.ai" appears at the top center. White line separators between
+    bars provide visual clarity. The overall layout is clean with good proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and compass directions are clearly readable; radial axis labels
+          are slightly small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stacked bars are perfectly visible with good sizing and white separators
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good contrast between blue, yellow, and teal; not strictly colorblind-optimized
+          but distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills good portion of canvas with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Has "Frequency (%)" label but no context for what frequencies represent
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend well-placed, but legend background could be
+          more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar bar chart / wind rose
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction mapped to angle, magnitude to bar height
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: stacked bars, 8 compass directions, color
+          encoding for speed ranges'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all three wind speed categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "polar-bar · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows stacked bars, varying magnitudes by direction, W/SW dominant;
+          could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Coastal wind pattern scenario is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Frequencies are plausible though units as "%" may be slightly misleading
+          for absolute counts
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also outputs plot.html (HTML is expected for
+          plotly)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Barpolar effectively with stacking; could leverage hover
+          templates or animations for better plotly-specific features
+  verdict: APPROVED
diff --git a/plots/polar-bar/metadata/plotnine.yaml b/plots/polar-bar/metadata/plotnine.yaml
index 468dc58a93..264dbcaede 100644
--- a/plots/polar-bar/metadata/plotnine.yaml
+++ b/plots/polar-bar/metadata/plotnine.yaml
@@ -23,3 +23,185 @@ review:
   - Figure size is 12x12 instead of the recommended 16x9 landscape (though square
     is acceptable for polar plots)
   - Missing frequency unit in title or labels (e.g., days or %)
+  image_description: The plot displays a polar bar chart (wind rose) with 8 compass
+    direction wedges radiating from the center. The wedges alternate between Python
+    Blue (#306998) and Yellow (#FFD43B) colors. Compass directions (N, NE, E, SE,
+    S, SW, W, NW) are labeled around the perimeter in bold text. Concentric dashed
+    circular gridlines at intervals of 5, 10, 15, and 20 are visible with frequency
+    labels (5, 10, 15, 20) positioned along the NNE axis. The SW direction has the
+    longest wedge (frequency ~22), followed by S (~18), N (~15), and E (~12). The
+    title "Wind Direction Frequency · polar-bar · plotnine · pyplots.ai" appears at
+    the top. The plot uses a square 1:1 aspect ratio with clean white background and
+    subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt and clearly readable, compass labels are bold and well-sized
+          at 16pt, frequency labels are legible at 10pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels positioned with adequate spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Wedges are clearly visible with good sizing, appropriate alpha (0.85),
+          dark outlines for contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and yellow are distinguishable, though alternating pattern could
+          be more colorblind-friendly with additional differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format perfect for polar chart, plot fills canvas well, balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No axis labels (though this is appropriate for polar charts where
+          compass directions serve this purpose)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed gridlines with appropriate alpha, no legend needed
+          as colors are for visual appeal only
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar bar chart (wind rose) with wedge-shaped bars radiating
+          from center
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angle represents direction, bar length represents magnitude/frequency
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 8 compass directions, bars extend outward from center, gridlines
+          for magnitude reference
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, gridlines extend appropriately beyond max value
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed (colors are decorative), compass labels are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{description} · {spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied frequencies across 8 directions demonstrating the wind
+          rose pattern, though single-category only (no stacked example)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind direction frequency is the classic use case for polar bar charts,
+          plausible distribution pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Frequency values (5-22) are realistic for wind measurements, though
+          no units specified
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → geometry construction → plot
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not used since data is hardcoded,
+          good practice)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but figure_size is (12,12) not (12,12) @ 300dpi
+          = 3600x3600 as expected
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of geom_polygon to construct polar wedges, geom_path
+          for gridlines, geom_segment for spokes, geom_text for labels. Demonstrates
+          plotnine's layered grammar of graphics approach. Since plotnine lacks native
+          coord_polar(), the manual Cartesian-to-polar transformation is an impressive
+          workaround that stays within the library.
+  verdict: APPROVED
diff --git a/plots/polar-basic/metadata/altair.yaml b/plots/polar-basic/metadata/altair.yaml
index 937f7dfb20..5a589e1154 100644
--- a/plots/polar-basic/metadata/altair.yaml
+++ b/plots/polar-basic/metadata/altair.yaml
@@ -25,3 +25,177 @@ review:
   weaknesses:
   - Title format should be {spec-id} · {library} · pyplots.ai but shows Hourly Temperature
     · polar-basic · altair · pyplots.ai - should be polar-basic · altair · pyplots.ai
+  image_description: The plot displays a polar chart showing hourly temperature data
+    over a 24-hour cycle. The chart uses a white background with a circular layout.
+    Concentric dashed gray circles form radial gridlines at 5 different radii. Eight
+    radial spokes extend from the center to mark major hours (00:00, 03:00, 06:00,
+    09:00, 12:00, 15:00, 18:00, 21:00), with bold dark gray time labels positioned
+    around the perimeter. The data is represented by blue filled circular markers
+    (size ~500) connected by a yellow/gold line (strokeWidth 4). The pattern shows
+    temperatures are lowest around midday (points extend furthest from center at bottom/12:00)
+    and highest at night (points closest to center at top/00:00). The title "Hourly
+    Temperature · polar-basic · altair · pyplots.ai" appears at the top in a large
+    font.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (fontSize=30), hour labels are bold and readable (fontSize=22)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, labels well spaced around perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized (size=500) for 24 data points with
+          good opacity (0.9)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 1200x1200 format is perfect for polar chart, plot fills canvas
+          well
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No axis labels since this is a polar chart simulated in Cartesian
+          space (acceptable for polar format)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and 0.3-0.4 opacity, no legend needed
+          for single series
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar chart type with angular and radial positioning
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (angle) correctly maps hours, radius correctly maps temperature
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Radial gridlines visible, angular labels at standard intervals, proper
+          radial scale
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 24 hours visible, temperature range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: Title includes extra "Hourly Temperature" prefix; should be just
+          "polar-basic · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full 24-hour cycle with natural temperature variation pattern
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Hourly temperature is a real-world application, diurnal pattern is
+          realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperatures range appropriately (~5°C to ~25°C), realistic for daily
+          variation
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → layers → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Good use of layering, tooltips, and declarative encoding, but polar
+          simulation is a workaround since Altair lacks native polar support
+  verdict: APPROVED
diff --git a/plots/polar-basic/metadata/bokeh.yaml b/plots/polar-basic/metadata/bokeh.yaml
index ff383c0657..a21f30e152 100644
--- a/plots/polar-basic/metadata/bokeh.yaml
+++ b/plots/polar-basic/metadata/bokeh.yaml
@@ -24,3 +24,173 @@ review:
   - Missing radial axis labels to show temperature scale/units
   - Could leverage HoverTool to display hour and temperature on hover for interactivity
   - Title font size could be slightly larger for 3600x3600 canvas
+  image_description: |-
+    The plot displays a polar chart showing hourly temperature data over a 24-hour cycle. It uses a circular layout with the center representing the origin. The chart has:
+    - **Title**: "polar-basic · bokeh · pyplots.ai" at the top in gray text
+    - **Angular labels**: 8 time labels at 3-hour intervals (00:00, 03:00, 06:00, 09:00, 12:00, 15:00, 18:00, 21:00) in gray text around the perimeter
+    - **Radial gridlines**: 4 concentric circles in light gray with low alpha
+    - **Angular gridlines**: 8 spokes extending from center to edge in light gray
+    - **Data representation**: Blue (#306998) scatter points connected by blue lines forming a closed polygon
+    - **Pattern**: The data shows temperatures peaking around 12:00-15:00 (afternoon, maximum radius at bottom) and lowest around 00:00-06:00 (early morning, minimum radius at top)
+    - **Background**: Light off-white (#fafafa)
+    - **Layout**: Square aspect ratio, plot well-centered with balanced margins
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and hour labels readable, though title could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers and lines appropriately sized for 24 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout but slight empty space in corners due to circular plot
+          in square canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Hour labels are descriptive but no radial axis labels showing temperature
+          scale
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, no legend needed for single series
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar/radar chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (hours) and radius (temperature) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: radial gridlines, angular labels at standard
+          intervals, starting angle at top for time-based data'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 24 data points visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series (full points)
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but missing spec-id vs spec title distinction
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cyclical pattern well, though more variation would help
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Hourly temperature cycle is a perfect real-world polar chart use
+          case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic for daily cycle
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Correctly uses ColumnDataSource and manual polar grid construction,
+          but doesn't leverage Bokeh's interactive features like HoverTool which would
+          be distinctive
+  verdict: APPROVED
diff --git a/plots/polar-basic/metadata/highcharts.yaml b/plots/polar-basic/metadata/highcharts.yaml
index ba87ee73a3..4f1b542975 100644
--- a/plots/polar-basic/metadata/highcharts.yaml
+++ b/plots/polar-basic/metadata/highcharts.yaml
@@ -25,3 +25,174 @@ review:
   - Y-axis (radial) title is rotated vertically making it harder to read
   - Grid alpha could be slightly more visible (0.2-0.3 rather than 0.15)
   - Could use hour labels (0h, 6h, 12h, 18h) instead of degrees for better context
+  image_description: The plot displays a polar chart with a white background. The
+    title "polar-basic · highcharts · pyplots.ai" is shown in bold black text at the
+    top, with the subtitle "24-Hour Temperature Pattern" below it in gray. The circular
+    chart has angular axis labels at 30° intervals (0°, 30°, 60°, ..., 330°) around
+    the perimeter. The radial axis shows temperature in Celsius from 0°C to 30°C with
+    gridlines every 5°C, and a rotated "Temperature (°C)" label. Data points are connected
+    by blue lines forming an irregular polygon shape, showing temperature variation
+    throughout a 24-hour cycle. The temperatures are lower around 0° (near the center)
+    and higher around 90-180° (extending outward). A legend at the bottom shows "Temperature"
+    with a blue marker. The chart uses a blue color (#306998) which is colorblind-safe.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable at full size, though some radial
+          labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Data points and connecting lines are visible, markers are appropriately
+          sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe blue (#306998) with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with 70% pane size, though some whitespace at edges
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Temperature (°C)" with units, X-axis shows degrees
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle at 0.15 alpha, legend well-placed but could be larger
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar chart type with angular and radial axes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (hours as degrees) and radius (temperature) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has radial gridlines, angular labels at standard intervals; missing
+          compass-style labels option
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within 0-35°C range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Temperature"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact required format "polar-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cyclical temperature pattern with variation, demonstrates polar
+          nature well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 24-hour temperature pattern is a real-world scenario mentioned in
+          spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range 5-25°C is realistic, though some noise points seem
+          low for daytime
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts polar mode with scatter series, but could leverage
+          more interactive features
+  verdict: APPROVED
diff --git a/plots/polar-basic/metadata/letsplot.yaml b/plots/polar-basic/metadata/letsplot.yaml
index 960bfe11c1..1259f4b4c4 100644
--- a/plots/polar-basic/metadata/letsplot.yaml
+++ b/plots/polar-basic/metadata/letsplot.yaml
@@ -25,3 +25,182 @@ review:
     approach for this library
   - Temperature scale labels positioned only along one spoke; could be more intuitive
     if integrated differently
+  image_description: The polar chart displays 24-hour temperature data in a circular
+    format. The chart uses a white background with light gray dashed circular gridlines
+    representing temperature values and solid gray spoke lines at 3-hour intervals.
+    Data points are shown as blue (#306998) dots connected by a blue line forming
+    a closed polygon. Hour labels (00:00, 03:00, 06:00, 09:00, 12:00, 15:00, 18:00,
+    21:00) are positioned around the perimeter in dark gray text. Temperature scale
+    labels (2°C, 9°C, 16°C, 23°C, 30°C) appear along the 06:00 (right) spoke. The
+    title "polar-basic · letsplot · pyplots.ai" appears in the top-left corner. The
+    data pattern correctly shows cooler temperatures at night (points closer to center
+    around 00:00-06:00) and warmer temperatures during the day (points farther from
+    center around 12:00).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is clearly readable; hour labels and temperature scales
+          are well-sized, title could be slightly larger for the canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all labels are well-positioned and clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Data points and connecting line are clearly visible; markers could
+          be slightly larger for better emphasis
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (blue) on white background with gray gridlines;
+          excellent contrast and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square canvas is well-utilized; polar chart is centered with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels (expected for polar chart, but could include "Temperature"
+          or "Hour" annotation)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed gridlines with appropriate alpha; no legend needed
+          for single-series data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar chart implementation using manual coordinate transformation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (hour) and radius (temperature) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Radial gridlines, angular labels at standard intervals, clock-style
+          orientation (0 at top)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible; appropriate radial scale that doesn't compress
+          data near center
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Temperature scale labels present along radial axis
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "polar-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows cyclical temperature pattern demonstrating polar chart strengths;
+          minor: could show more variation in the data'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Hourly temperature readings with realistic day/night temperature
+          variation (cooler at night, warmer at noon)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range (approximately 2°C to 30°C) is realistic for daily
+          variation; some values seem slightly wide for a single day
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → coordinate conversion → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses path="." which works but non-standard; saves both plot.png and
+          plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar effectively with geom_path, geom_point, geom_segment,
+          geom_text, and theme customization; however, does not leverage lets-plot's
+          coord_polar() which would be the idiomatic approach
+  verdict: APPROVED
diff --git a/plots/polar-basic/metadata/matplotlib.yaml b/plots/polar-basic/metadata/matplotlib.yaml
index 345a708a97..7bcf757bc1 100644
--- a/plots/polar-basic/metadata/matplotlib.yaml
+++ b/plots/polar-basic/metadata/matplotlib.yaml
@@ -25,3 +25,180 @@ review:
   - Missing explanatory context - no subtitle or annotation explaining this is simulated
     daily temperature data
   - Could leverage more matplotlib polar features like fill_between for area emphasis
+  image_description: The plot displays a polar chart showing hourly temperature patterns
+    over a 24-hour cycle. The chart uses a circular coordinate system with time labels
+    around the perimeter (12 AM at top, proceeding clockwise through 3 AM, 6 AM, 9
+    AM, 12 PM, 3 PM, 6 PM, 9 PM). Temperature is represented radially with concentric
+    circles marked at 5°C, 10°C, 15°C, 20°C, and 25°C. Data points are shown as blue
+    (#306998) scatter points with white edges, connected by a yellow/gold (#FFD43B)
+    line forming a closed loop. The pattern shows cooler temperatures (around 7-15°C)
+    near midnight/early morning and warmer temperatures (around 18-23°C) in the afternoon,
+    clearly demonstrating the expected diurnal temperature cycle. The title "Hourly
+    Temperature Pattern · polar-basic · matplotlib · pyplots.ai" is displayed at the
+    top.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at fontsize 26, angular labels at 18, radial labels at 16 -
+          all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, well-spaced labels
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at s=300 with white edges are clearly visible, line connects
+          well; slight deduction as 24 points could use slightly smaller markers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue markers and yellow line provide excellent contrast; no colorblind
+          issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format (12x12) is perfect for polar chart, plot fills canvas
+          well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Radial labels include units (°C), angular labels are time-based
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha=0.3, but no legend explaining what the
+          data represents (no legend needed for single series, but context could be
+          improved)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta=hours (angular), radius=temperature correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: scatter points, line connection, proper
+          angular labels, radial gridlines'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows 0-30°C which contains all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Hourly Temperature Pattern · polar-basic ·
+          matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cyclical pattern clearly with variation; could show more dramatic
+          temperature swings
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Hourly temperature is an excellent real-world application for polar
+          charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperatures 7-23°C are realistic; slight deduction as the sinusoidal
+          base + noise is somewhat idealized
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses polar projection correctly, but doesn't leverage advanced matplotlib
+          polar features like fill_between or custom theta formatting
+  verdict: APPROVED
diff --git a/plots/polar-basic/metadata/plotly.yaml b/plots/polar-basic/metadata/plotly.yaml
index 9cdb039da0..bc541a4663 100644
--- a/plots/polar-basic/metadata/plotly.yaml
+++ b/plots/polar-basic/metadata/plotly.yaml
@@ -24,3 +24,187 @@ review:
   - The baseline reference circle at r=0 adds unnecessary visual noise and code complexity
   - Grid opacity (0.35) is slightly high; could be more subtle at 0.25-0.3
   - Could leverage more Plotly-specific interactive features like custom hover templates
+  image_description: The plot displays a polar chart showing 24-hour temperature data.
+    The chart has a circular layout with hour labels (0h through 22h) positioned around
+    the perimeter at 2-hour intervals, starting from the top (0h/midnight). The radial
+    axis shows temperature values from 0 to 25°C with the label "Temperature (°C)"
+    in the center. Data points are shown as medium-sized blue markers (#306998) connected
+    by lines, with a light blue semi-transparent fill (toself). The pattern shows
+    warmer temperatures during midday hours (10h-14h reaching ~20-25°C) and cooler
+    temperatures at night (~5-10°C around 22h-4h). The title "polar-basic · plotly
+    · pyplots.ai" appears at the top. The background is white with subtle gray grid
+    lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title, axis labels, and tick labels are all readable. Font sizes
+          are appropriate for the canvas. Minor: tick font could be slightly larger.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Hour labels are well-spaced around
+          the perimeter.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are visible and appropriately sized (16px). Lines connect
+          clearly. Fill provides good area visualization. Slightly larger markers
+          would improve visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast against white background.
+          No colorblind issues.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Polar chart fills the canvas well. Balanced margins. Good use of
+          space.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Radial axis labeled "Temperature (°C)" with proper units.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is visible but slightly too prominent (alpha 0.35 is higher
+          than recommended 0.2-0.4 range ceiling). Legend is hidden which is appropriate
+          for single series, but the baseline trace at r=0 is unnecessary and adds
+          visual noise.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar chart type using Scatterpolar.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (angular position) correctly mapped to hours, radius mapped
+          to temperature.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: radial gridlines visible, angular labels
+          at standard intervals, starting angle at top (90°) for time-based data,
+          appropriate radial scale.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data. Range set to 0 to max(radius)*1.1.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden for single series; trace name is descriptive.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "polar-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows cyclical temperature pattern well with day/night variation.
+          Could benefit from showing both the cyclical nature AND some anomalies or
+          multiple data series to better demonstrate polar chart capabilities.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Hourly temperature readings is an excellent, realistic application.
+          The sinusoidal pattern with noise mimics real daily temperature cycles.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (5-25°C) are realistic for a typical day's temperature
+          range.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save. No functions
+          or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible noise.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png AND plot.html (both correct), but the baseline
+          trace at r=0 is unnecessary code.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Scatterpolar correctly with fill="toself" for area visualization.
+          Saves both PNG and HTML for interactivity. However, could leverage more
+          Plotly-specific features like hover customization or animation.
+  verdict: APPROVED
diff --git a/plots/polar-basic/metadata/plotnine.yaml b/plots/polar-basic/metadata/plotnine.yaml
index 563a53f812..680a1300d3 100644
--- a/plots/polar-basic/metadata/plotnine.yaml
+++ b/plots/polar-basic/metadata/plotnine.yaml
@@ -26,3 +26,178 @@ review:
     Level)
   - The implementation relies heavily on manual Cartesian conversion rather than leveraging
     plotnine distinctive features
+  image_description: The plot displays a polar/radar chart showing hourly activity
+    levels throughout a 24-hour day. The chart uses a circular coordinate system with
+    time labels at 8 positions (00:00, 03:00, 06:00, 09:00, 12:00, 15:00, 18:00, 21:00)
+    around the perimeter. Four concentric dashed circular gridlines mark radius values
+    of 25, 50, 75, and 100. Eight radial spokes extend from the center. Data points
+    (24 total, one per hour) are shown as yellow-filled circles with blue outlines,
+    connected by a blue line forming a closed loop. The pattern shows lower activity
+    at night (around midnight, closer to center) and higher activity during morning/afternoon
+    hours (extended toward outer rings). The title "Hourly Activity Levels · polar-basic
+    · plotnine · pyplots.ai" appears at the top. The background is white/clean with
+    no axis labels visible.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: title is clear at 24pt, hour labels readable at size 14, radius labels
+          slightly small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: points are well-sized (size=4 with stroke), line visible (size=1.5),
+          appropriate for 24 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue (#306998) and yellow (#FFD43B) provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: good use of canvas space, plot centered, margins balanced, slight
+          excess whitespace at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: polar chart has no traditional axis labels (this is acceptable for
+          polar plots, but no units shown for radius)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: gridlines are subtle (alpha=0.6, dashed), no legend needed
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct polar chart with circular coordinates
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: theta (angle) and radius correctly mapped to hour and activity level
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: radial gridlines visible, angular labels at standard intervals, starts
+          at top (90°) for time-based data per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible within range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: no legend present (acceptable for single series, but could label
+          what "activity" means)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: correctly uses "polar-basic · plotnine · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows cyclical pattern well with variation, demonstrates both high
+          and low activity periods
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: hourly activity levels is a very realistic and relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: activity values 5-100 are sensible, though units unclear
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: follows imports → data → plot → save pattern without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: uses plotnine's grammar of graphics (ggplot, aes, geom_*) but since
+          plotnine lacks native polar coordinates, the implementation manually converts
+          to Cartesian - this is a creative workaround but not a distinctive plotnine
+          feature
+  verdict: APPROVED
diff --git a/plots/polar-basic/metadata/pygal.yaml b/plots/polar-basic/metadata/pygal.yaml
index 6fc148edc0..594f58c2e2 100644
--- a/plots/polar-basic/metadata/pygal.yaml
+++ b/plots/polar-basic/metadata/pygal.yaml
@@ -25,3 +25,176 @@ review:
   - Grid shows y-guides but could benefit from more prominent radial ring labels
   - Does not utilize pygal interactive tooltip features which are a distinctive library
     strength
+  image_description: The plot shows a radar/polar chart with 24 spokes representing
+    hours (00:00 to 23:00) arranged clockwise around a circular axis. The radial axis
+    shows temperature values from 0 to approximately 20°C with gridlines at 4, 8,
+    12, 16, and 20. The data is displayed as a filled polygon in a light blue color
+    (#306998 with opacity) connecting 24 data points representing hourly temperatures.
+    The polygon shows a clear daily temperature cycle with lower values in early morning
+    hours (around 00:00-06:00) and peak values around midday (11:00-14:00). The title
+    "Hourly Temperature (°C) · polar-basic · pygal · pyplots.ai" appears at the top.
+    All hour labels are clearly readable around the perimeter, and radial gridlines
+    are visible but subtle.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; hour labels and title are clear, though radial
+          axis numbers are slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all labels are well-spaced around the perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Data points and filled area are clearly visible; dots could be slightly
+          larger for emphasis
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square canvas is well utilized; radar chart fills the available space
+          appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Title includes units (°C); hour labels are descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, but legend is disabled (show_legend=False)
+          which is acceptable for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Radar chart is an appropriate polar-like visualization for pygal
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hours mapped to angles, temperature mapped to radius correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements present: radial gridlines, angular labels,
+          appropriate scale'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the chart bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Single series with descriptive title; legend disabled appropriately
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: {description} · {spec-id} · {library} · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cyclic temperature pattern well; demonstrates the circular
+          nature of 24-hour data
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Hourly temperature readings are a perfect realistic use case for
+          polar charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range ~7-23°C is realistic for daily variation, though
+          some values go slightly negative conceptually
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → style → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png AND plot.html (bonus for pygal)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Radar chart, custom Style, fill=True; could use more pygal-specific
+          features like tooltips
+  verdict: APPROVED
diff --git a/plots/polar-basic/metadata/seaborn.yaml b/plots/polar-basic/metadata/seaborn.yaml
index f986b18e86..824a4f193d 100644
--- a/plots/polar-basic/metadata/seaborn.yaml
+++ b/plots/polar-basic/metadata/seaborn.yaml
@@ -29,3 +29,173 @@ review:
   - The Visitors y-axis label position on the right side is unconventional for polar
     plots
   - Grid style uses dashed lines which could be simplified to solid with lower alpha
+  image_description: The plot displays a polar chart showing website traffic by hour
+    of day. The chart uses a viridis colormap with colors ranging from purple/dark
+    blue (lower traffic ~100) to yellow (higher traffic ~200+). Data points are shown
+    as large circular markers with dark blue (#306998) borders, connected by a continuous
+    line with the same blue color. The area under the curve is filled with a light
+    blue transparent fill. Hour labels (00:00 through 23:00) are positioned around
+    the circumference in clockwise direction starting from 00:00 at the top. Radial
+    gridlines show values at 50, 100, 150, and 200. The title "Website Traffic by
+    Hour · polar-basic · seaborn · pyplots.ai" appears at the top. A colorbar on the
+    right shows "Traffic Volume" with the viridis scale. The y-axis label "Visitors"
+    appears on the right side. The traffic pattern clearly shows peaks around 19:00-21:00
+    (evening) with the highest yellow points, and moderate activity during morning
+    hours.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, hour labels at 14pt are readable, radial tick
+          labels at 14pt are clear, colorbar label at 16pt is legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All hour labels are well-spaced around the circumference, no text
+          overlaps
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are large (s=300) with good alpha (0.8), appropriate for
+          24 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of square format for polar plot, but colorbar takes space
+          and "Visitors" label positioning could be better
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Visitors" label present but no units; hour labels are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid alpha at 0.3 is appropriate, but dashed style is slightly distracting;
+          colorbar well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar chart with theta (angle) and radius (distance from
+          center)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hours mapped to theta, traffic values mapped to radius correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Angular labels at standard intervals, radial gridlines visible, appropriate
+          radial scale
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, radial scale extends 15% beyond max for clarity
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents traffic volume values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format with spec-id, library, and pyplots.ai
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows cyclical pattern, peaks and troughs, variation across hours
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic by hour is a real-world scenario mentioned in spec
+          applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Traffic values (100-220) are realistic for hourly visitor counts
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: seaborn imported but sns plotting functions not directly used for
+          data visualization (only for styling and color palette)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs are current
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves correctly to plot.png
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/polar-line/metadata/altair.yaml b/plots/polar-line/metadata/altair.yaml
index 74ab643e6d..af322e11d6 100644
--- a/plots/polar-line/metadata/altair.yaml
+++ b/plots/polar-line/metadata/altair.yaml
@@ -23,3 +23,179 @@ review:
   weaknesses:
   - No radius scale labels showing what the concentric circles represent (temperature
     values)
+  image_description: The plot displays a polar line chart showing monthly temperature
+    patterns for two cities. A blue line represents "Northern City" and a yellow/gold
+    line represents "Southern City". The chart has 12 month labels (Jan-Dec) arranged
+    clockwise starting from the right (Jan at ~0°/360°, progressing counterclockwise
+    through the months ending at Dec). Four concentric gray circles form the grid
+    background, with 12 radial lines extending from the center to each month position.
+    Both city lines form closed polygons connecting data points marked with filled
+    circles. The Northern City shows peak values around Jul (summer) and low values
+    around Jan (winter), while the Southern City shows the opposite pattern - demonstrating
+    hemisphere seasonality. The title "polar-line · altair · pyplots.ai" appears at
+    the top with subtitle "Monthly Temperature Patterns". A legend at the bottom identifies
+    the two city series.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, and month labels are all clearly readable with appropriate
+          font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; month labels well-spaced around the perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines and points are well-sized, though points could be slightly
+          larger for optimal visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) have excellent contrast and are
+          colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; legend at bottom is slightly far from chart
+          but acceptable
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No axis labels for radius values (no temperature scale visible)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid circles are subtle (opacity 0.5), legend is clear and well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar line plot with connected points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angle=months (theta), radius=temperature magnitude
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, connected lines, polar coordinates, grid lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the polar area
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Northern and Southern cities
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "polar-line · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple series with contrasting seasonal patterns (hemisphere
+          comparison)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature patterns for cities in different hemispheres is a natural
+          and educational use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (5-30°C range) are realistic for seasonal patterns
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart layers → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually deterministic arrays
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Altair lacks native polar support, so the implementation manually
+          converts to Cartesian coordinates. While this is a valid workaround, it
+          doesn't showcase Altair's declarative strengths (the chart is essentially
+          built manually with mark_line and mark_rule). The layering approach is reasonable
+          but not a distinctive Altair feature.
+  verdict: APPROVED
diff --git a/plots/polar-line/metadata/bokeh.yaml b/plots/polar-line/metadata/bokeh.yaml
index 01e0015529..c19f06fce3 100644
--- a/plots/polar-line/metadata/bokeh.yaml
+++ b/plots/polar-line/metadata/bokeh.yaml
@@ -24,3 +24,176 @@ review:
   - Missing axis label for radial dimension (could add Wind Speed m/s text annotation)
   - Radius labels (2, 4, 6, 8, 10) positioned only on positive X-axis could be more
     prominent
+  image_description: 'The plot shows a polar line chart visualizing wind speed patterns
+    over 24 hours for two days. The chart uses a circular layout with hour labels
+    (0h-22h) positioned around the perimeter at 2-hour intervals. Concentric circles
+    represent wind speed values (2, 4, 6, 8, 10 m/s). Two line series are displayed:
+    Day 1 in blue (#306998) and Day 2 in yellow (#FFD43B), both with data markers.
+    The lines form closed loops connecting back to the starting point. The plot has
+    a light gray background (#fafafa) with subtle gray grid lines. A legend in the
+    top-right corner identifies the two series. The title "polar-line · bokeh · pyplots.ai"
+    is centered at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, hour labels at 18pt, all clearly readable. Radius
+          labels slightly small at 14pt.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines at width=4 and markers at size=12 are well-suited for the data
+          density (~26 points per series)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, good contrast against background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, though plot is slightly positioned toward
+          upper half
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels visible (axes hidden for polar plot, but no "Wind
+          Speed (m/s)" label on radial axis)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha=0.5, legend well-placed in top-right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar line plot with angular and radial dimensions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (hours) correctly mapped to angle, radius (wind speed) correctly
+          mapped to distance from center
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series shown, lines connect points in theta order, grid
+          lines are concentric circles and radial lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 24-hour cycle shown, wind speed range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Day 1 and Day 2
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "polar-line · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cyclical pattern, multiple series with variation, closed loop.
+          Could show more extreme variation between days.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind speed over 24 hours is an excellent, neutral, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind speeds 0.5-10 m/s are realistic, though slightly high for some
+          hours
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: ColumnDataSource imported but could be avoided (direct arrays work
+          too)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and export_png, but doesn't leverage Bokeh-specific
+          interactive features like HoverTool. However, for a PNG export this is acceptable.
+  verdict: APPROVED
diff --git a/plots/polar-line/metadata/highcharts.yaml b/plots/polar-line/metadata/highcharts.yaml
index 5e0089ca1c..6608162601 100644
--- a/plots/polar-line/metadata/highcharts.yaml
+++ b/plots/polar-line/metadata/highcharts.yaml
@@ -25,3 +25,179 @@ review:
     the chart area and the legend
   - The Y-axis title appears rotated/sideways in the center which is slightly awkward
     to read
+  image_description: 'The plot displays a polar line chart on a white background with
+    the title "polar-line · highcharts · pyplots.ai" at the top and subtitle "Monthly
+    Average Temperature Patterns" below. The chart shows two line series representing
+    temperature patterns for two cities: "Continental City" (blue/dark blue color
+    with circle markers, #306998) and "Oceanic City" (yellow/gold color with diamond
+    markers, #FFD43B). The angular axis displays months (Jan through Dec) around the
+    perimeter in a clockwise direction starting from the top. The radial axis shows
+    temperature values from 0 to 28°C with concentric polygon gridlines. The Continental
+    City line shows a larger amplitude pattern (ranging from ~2°C in winter to ~26°C
+    in summer), while the Oceanic City line shows a more moderate pattern (~8-21°C).
+    The legend is positioned at the bottom center of the image.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, subtitle at 32px, axis labels at 28px, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, month labels well spaced around the perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 5 and marker radius of 10 are well-suited for the data
+          density (12 points per series)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) combination is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Square format appropriate for polar chart, but there's significant
+          whitespace around the chart
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis labeled "Temperature (°C)" with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well placed but very far from the chart with excessive
+          whitespace; grid is subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar line plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (months) and radius (temperature) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, line connections, cyclical data all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range 0-30 shows all data clearly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match series names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "polar-line · highcharts · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows two distinct patterns (continental vs oceanic climate), demonstrates
+          cyclical nature of data
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature patterns for two climate types is a perfect,
+          neutral, real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (2-26°C for continental, 8-21°C for oceanic) are
+          realistic
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script without functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: numpy imported but only seed() is used (data is hardcoded), minor
+          issue
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Highcharts API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to container.screenshot() not driver.save_screenshot(), but
+          outputs plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts polar chart with gridLineInterpolation polygon, multiple
+          series with different marker symbols. Could have used more advanced features
+          like tooltips configuration or animation options.
+  verdict: APPROVED
diff --git a/plots/polar-line/metadata/letsplot.yaml b/plots/polar-line/metadata/letsplot.yaml
index f8906e724d..c6c163cc26 100644
--- a/plots/polar-line/metadata/letsplot.yaml
+++ b/plots/polar-line/metadata/letsplot.yaml
@@ -24,3 +24,182 @@ review:
   - Missing temperature scale labels (no indication of what the concentric circles
     represent in °C)
   - 'Minor: LetsPlot.setup_html() call is unnecessary for PNG-only output'
+  image_description: The plot displays a polar line chart showing monthly average
+    temperature patterns for two cities (City A and City B) throughout the year. The
+    visualization uses a circular layout with month labels (Jan-Dec) arranged clockwise
+    around the perimeter starting from January on the right. Three concentric gray
+    grid circles indicate temperature magnitude from the center outward, with 12 radial
+    lines marking each month. City A is shown in dark blue (#306998) and City B in
+    golden yellow (#FFD43B). Both series form closed polygons connecting temperature
+    values month-by-month, clearly showing the seasonal pattern with peaks in summer
+    months (Jun-Aug) and troughs in winter (Dec-Feb). City B consistently shows higher
+    temperatures than City A. The title "polar-line · letsplot · pyplots.ai" appears
+    at the top. A legend labeled "Location" on the right identifies the two city series.
+    The plot uses a void theme with a clean white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and month labels are clearly readable; legend text is appropriately
+          sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; month labels well-positioned around
+          perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines and points are clearly visible; line width and point sizes
+          appropriate for data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot centered with balanced margins; legend
+          positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No axis labels for temperature values (no indication of units like
+          °C)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid circles and radial lines are subtle (alpha 0.6); legend well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar line plot with lines connecting points around circular
+          axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (angle) represents months, radius represents temperature magnitude
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series with different colors, closed loop, concentric grid
+          circles, radial lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within the grid
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies City A and City B
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "polar-line · letsplot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple series comparison, cyclical seasonal pattern, clear
+          variation across months
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature comparison between two cities is a neutral, real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values (2-30°C) are realistic for temperate climate cities
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is mostly deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Several unused imports: coord_fixed is used, but LetsPlot.setup_html()
+          is for HTML output only (not needed for PNG)'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but ggsave path parameter usage is unconventional
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Properly uses ggplot grammar, geom_path, geom_point, geom_text, scale_color_manual,
+          theme_void. However, the manual polar coordinate transformation is a workaround
+          since lets-plot lacks native coord_polar(). This is acceptable but not showcasing
+          lets-plot's strengths.
+  verdict: APPROVED
diff --git a/plots/polar-line/metadata/matplotlib.yaml b/plots/polar-line/metadata/matplotlib.yaml
index e555777b2a..bc667c2c11 100644
--- a/plots/polar-line/metadata/matplotlib.yaml
+++ b/plots/polar-line/metadata/matplotlib.yaml
@@ -26,3 +26,181 @@ review:
     natural visual flow
   - Could leverage matplotlib fill_between or polar-specific features for enhanced
     visualization
+  image_description: The plot displays a polar line chart showing hourly temperature
+    patterns across 24 hours for two seasons. The outer yellow/gold line represents
+    Summer temperatures (ranging from approximately 12°C to 28°C), while the inner
+    blue line represents Winter temperatures (ranging from approximately 0°C to 10°C).
+    The angular axis shows time in 3-hour intervals (0:00, 3:00, 6:00, 9:00, 12:00,
+    15:00, 18:00, 21:00) around the circle. The radial axis displays temperature with
+    concentric circles at 0°C, 10°C, 20°C, and 30°C. Both lines have circular markers
+    at each data point and form closed loops. The legend is positioned in the upper
+    right. The title reads "Hourly Temperature Pattern · polar-line · matplotlib ·
+    pyplots.ai" at the top. The plot uses a square format with clean, subtle grid
+    lines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, tick labels at 16pt, radial labels at 14pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, hour labels well-spaced around circumference
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 and marker size of 8 are well-suited for 24 data
+          points per series
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow (#FFD43B) and blue (#306998) are colorblind-safe with excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 12x12 format is perfect for polar plots, plot fills canvas
+          well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Radial axis shows temperature with units (°C), theta axis shows time
+          with format (HH:00)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3), but legend position at upper right could
+          be better integrated; slightly outside optimal placement
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar line plot with data connected around circular axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta correctly maps to hours (cyclical), radius correctly maps to
+          temperature (magnitude)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series shown with different colors as mentioned in spec
+          notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate range (0-35°C)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Summer and Winter series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Hourly Temperature Pattern · polar-line ·
+          matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple series, cyclical patterns, sinusoidal variation demonstrating
+          daily temperature swings
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Hourly temperature patterns for seasons is a perfect, neutral, realistic
+          scenario for polar plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Summer 12-28°C and Winter 0-10°C are realistic temperature ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic polar projection but no distinctive matplotlib features
+          like custom theta ticks formatting, fill_between for area, or advanced polar
+          customization
+  verdict: APPROVED
diff --git a/plots/polar-line/metadata/plotly.yaml b/plots/polar-line/metadata/plotly.yaml
index f1c518a380..728adbe7ae 100644
--- a/plots/polar-line/metadata/plotly.yaml
+++ b/plots/polar-line/metadata/plotly.yaml
@@ -27,3 +27,176 @@ review:
     slight redundancy in the center area
   - Could leverage more Plotly-specific features like fill=toself for area between
     lines or animation frames
+  image_description: 'The plot displays a polar line chart showing hourly temperature
+    patterns across a 24-hour cycle. Two closed lines represent Summer (yellow/gold
+    color, #FFD43B) and Winter (dark blue, #306998) temperature patterns. The angular
+    axis shows hours from 0:00 to 23:00 arranged clockwise starting from the top (midnight).
+    The radial axis displays temperature from 0°C to 35°C with concentric circles.
+    Summer temperatures peak around midday (12:00-15:00) at approximately 29-30°C
+    and dip to around 14°C at night. Winter temperatures show a similar diurnal pattern
+    but at much lower values (peak ~10°C, low ~0°C). The title correctly follows the
+    format ''Hourly Temperature Pattern · polar-line · plotly · pyplots.ai''. A legend
+    on the right side clearly identifies the two series.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, tick fonts at 16-18pt, all text perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, hour labels well-spaced around circle
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines at width=4 with markers at size=10, perfect visibility for
+          24 data points per series
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow vs dark blue provides excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot well-centered, good use of canvas, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Radial axis has 'Temperature (°C)' with units, angular axis has hour
+          labels
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (rgba 0.2 alpha), but radial axis tick labels with
+          redundant '°C' suffix overlap with axis title
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar line plot in polar coordinates
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta = hours (angle), Radius = temperature (magnitude) - correctly
+          assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series with different colors, lines connecting points in
+          theta order
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Radial axis 0-35°C shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Summer and Winter
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses '{spec-id} · {library} · pyplots.ai' format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple series, cyclical pattern completion (loop closed),
+          contrasting seasonal patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Hourly temperature pattern is a perfect, neutral, real-world application
+          for polar plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Summer temps 14-30°C, Winter temps 0-10°C are realistic seasonal
+          values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but should match library rules exactly
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses Scatterpolar with hover templates which is good, but doesn't
+          leverage more advanced Plotly features like animations or rangeslider
+  verdict: APPROVED
diff --git a/plots/polar-line/metadata/pygal.yaml b/plots/polar-line/metadata/pygal.yaml
index f799288fe4..6f65353a4f 100644
--- a/plots/polar-line/metadata/pygal.yaml
+++ b/plots/polar-line/metadata/pygal.yaml
@@ -24,3 +24,175 @@ review:
     in °C)
   - Dot markers could be slightly larger for better visibility at 4800x2700 resolution
   - Could showcase additional pygal features like tooltips or value display on hover
+  image_description: 'The plot displays a radar/polar line chart with monthly labels
+    (Jan through Dec) arranged around the perimeter. Two data series are shown: "Northern
+    City" in blue (#306998) and "Coastal City" in yellow (#FFD43B). The blue line
+    shows dramatic seasonal variation with peaks around July (27°C) and troughs in
+    winter months (2-4°C). The yellow line shows more moderate variation (10-23°C
+    range). Concentric polygonal grid lines mark values from 0 to 24. Data points
+    are marked with dots at each month position. The title "polar-line · pygal · pyplots.ai"
+    appears at the top center. A legend at the bottom identifies the two city series.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and month labels are clearly readable at large font sizes;
+          radial value labels (0-24) slightly small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; months are well-spaced around the perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines have good stroke width (6px), dots are visible (size 12); could
+          be slightly larger for the resolution
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with balanced margins; slight excess whitespace
+          on left and right edges
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Month labels present but no axis title indicating what the radial
+          values represent (temperature/°C missing)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with dotted lines; legend well-placed at bottom
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Radar chart correctly represents polar line visualization with connected
+          points around circular axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (months) and radius (temperature) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, connected lines, cyclical data visualization all
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data values visible within 0-24 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Northern City and Coastal City
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows exact format "polar-line · pygal · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows two series with contrasting patterns (high vs low variation);
+          could demonstrate more edge cases
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature data is a perfect real-world application of polar/cyclical
+          visualization
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (2-27°C for northern climate, 10-23°C
+          for coastal)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → style → chart → render structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) even though data is deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style are imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Radar chart, custom Style, inner_radius, dots_size; could leverage
+          more pygal-specific features like tooltips or animations
+  verdict: APPROVED
diff --git a/plots/polar-line/metadata/seaborn.yaml b/plots/polar-line/metadata/seaborn.yaml
index cdb9ae0ba0..d769de6b11 100644
--- a/plots/polar-line/metadata/seaborn.yaml
+++ b/plots/polar-line/metadata/seaborn.yaml
@@ -23,3 +23,178 @@ review:
   weaknesses:
   - Legend styling (fancybox=True, shadow=True) is overly decorative and inconsistent
     with clean seaborn aesthetic
+  image_description: 'The plot shows a polar/circular line chart displaying hourly
+    temperature patterns for Summer and Winter seasons. The circular layout has hour
+    labels (0:00, 3:00, 6:00, 9:00, 12:00, 15:00, 18:00, 21:00) around the perimeter,
+    representing a 24-hour cycle. Concentric circles show temperature values (10°C,
+    20°C, 30°C, 40°C). Two lines with markers are displayed: an orange line for Summer
+    (larger radius, ~17-33°C) and a blue line for Winter (smaller radius, ~3-13°C).
+    Both lines form closed loops connecting all 24 hourly data points. The legend
+    is positioned in the upper right corner with a white background box. The title
+    "Hourly Temperature Pattern · polar-line · seaborn · pyplots.ai" appears at the
+    top in bold. The plot uses a clean white background with subtle gray grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, hour labels at 18pt, temperature labels at 16pt
+          - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines at 3.5 width and markers at size 10 are visible, though markers
+          could be slightly larger for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's colorblind palette (orange/blue), excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good square format (12x12), plot fills canvas well, legend slightly
+          extends outside
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Temperature labels include units (°C), hour format is clear
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha at 0.4 is acceptable, but legend with fancybox and shadow
+          is overly decorative
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar line plot with circular axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theta (angle) = hours, Radius = temperature - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Line connects points in theta order, multiple series with different
+          colors, grid with concentric circles and radial lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate y-axis range (0-40°C)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Summer and Winter series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correctly uses format: Hourly Temperature Pattern · polar-line ·
+          seaborn · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows cyclical data (24-hour pattern), multiple series comparison,
+          clear seasonal difference
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Hourly temperature pattern is a real-world scenario, though the sinusoidal
+          pattern is slightly idealized
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Summer temps (17-33°C) and winter temps (3-13°C) are realistic, though
+          ranges could be slightly more varied
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, and seaborn imported, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but the filename in code is correct
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_theme() and sns.color_palette() for styling, but doesn't
+          use seaborn's high-level plotting functions (this is acceptable since seaborn
+          doesn't have native polar plot support)
+  verdict: APPROVED
diff --git a/plots/polar-scatter/metadata/altair.yaml b/plots/polar-scatter/metadata/altair.yaml
index 014e8b8cda..ef31a71f60 100644
--- a/plots/polar-scatter/metadata/altair.yaml
+++ b/plots/polar-scatter/metadata/altair.yaml
@@ -28,3 +28,182 @@ review:
     are a key library strength
   - Some point overlap in dense clusters could be improved with slightly smaller markers
     or stronger alpha
+  image_description: 'The plot displays a polar scatter chart of wind observations
+    on a square canvas. Four concentric gray circles mark radii at 5, 10, 15, and
+    20 m/s. Eight radial spokes extend from the center at 45° intervals, labeled with
+    cardinal/intercardinal directions and degree values (e.g., "N (90°)", "SE (315°)").
+    Data points are colored by time of day: blue for Morning, yellow for Afternoon,
+    and coral/pink for Evening. The Morning (blue) points cluster strongly in the
+    SE quadrant with higher speeds, the Afternoon (yellow) points cluster in the NW
+    quadrant with moderate-to-high speeds, and Evening (coral) points are scattered
+    more diffusely with lower speeds. The title "Wind Observations · polar-scatter
+    · altair · pyplots.ai" appears at the top center, and a legend in the upper right
+    shows the "Time of Day" categories.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title 28pt, direction labels 18pt bold, radius labels 14pt - all
+          clearly readable, slightly below optimal for tick labels
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; direction labels well-spaced outside plot area
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers size=200 with opacity=0.8 is good for 120 points, but some
+          overlap in dense clusters
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998), Yellow (#FFD43B), Coral (#E87D72) are colorblind-safe
+          and easily distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 1:1 aspect ratio fills canvas well, balanced margins, legend
+          positioned nicely
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No traditional axis labels (polar plot uses direction/radius labels
+          instead, which are present)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid circles and spokes at alpha=0.3 are subtle; legend well-placed
+          in upper right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar scatter plot using Cartesian transformation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angle mapped to direction, radius mapped to wind speed correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Color encoding for time of day, radial gridlines, angular tick marks,
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data (-25 to 25 scale accommodates max radius 20)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Morning/Afternoon/Evening categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Wind Observations · polar-scatter · altair · pyplots.ai" matches
+          required format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows prevailing wind directions (NW, SE), different speeds, time-based
+          patterns; could show more extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind measurement data is a realistic meteorological scenario with
+          prevailing directions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind speeds 1-20 m/s are realistic; gamma distribution creates natural
+          spread
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → grid creation → chart layering
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducible results
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses layering and declarative encoding well, but no interactive features
+          (Altair's strength); tooltips defined but not fully leveraged without interactivity
+          in PNG
+  verdict: APPROVED
diff --git a/plots/polar-scatter/metadata/bokeh.yaml b/plots/polar-scatter/metadata/bokeh.yaml
index 95a3c24d75..990518f575 100644
--- a/plots/polar-scatter/metadata/bokeh.yaml
+++ b/plots/polar-scatter/metadata/bokeh.yaml
@@ -24,3 +24,171 @@ review:
   - Radius labels (5, 10, 15...) are small relative to other text elements
   - Does not fully leverage Bokeh interactive capabilities (hover tooltips showing
     exact angle/speed)
+  image_description: |-
+    The plot displays a polar scatter chart showing wind measurement data. The visualization features:
+    - A circular polar coordinate system with concentric gridlines at intervals of 5, 10, 15, 20, 25, 30, 35, 40 m/s
+    - Angular spokes at 30° intervals extending from the center
+    - Cardinal direction labels (N, S, E, W) in bold black text
+    - Intermediate directions (NE, NW, SE, SW) in gray text
+    - Three categories of data points: Morning (blue), Afternoon (yellow), and Evening (red)
+    - Blue morning points clustered in the NE quadrant showing prevailing wind direction
+    - Yellow afternoon points concentrated in the SW quadrant
+    - Red evening points scattered more uniformly around the center
+    - A legend on the right side showing Morning, Afternoon, and Evening categories
+    - Title "polar-scatter · bokeh · pyplots.ai" at the top
+    - "Wind Speed (m/s)" label below the S direction
+    - Light gray background (#FAFAFA) with subtle grid lines
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt, direction labels at 22pt/18pt, radius labels at 16pt
+          - all readable but radius labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at size 22 with alpha 0.75 work well for 120 points, though
+          some overlap in dense areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red provide good colorblind-safe distinction
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square 3600x3600 format is appropriate for polar plot; y_range asymmetry
+          leaves slight extra space at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Wind Speed (m/s)" includes units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.5), legend well-placed but slightly separated
+          from plot
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar scatter plot using Cartesian conversion
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angle correctly maps to direction, radius to wind speed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has radial gridlines, angular tick marks, color encoding for categories,
+          legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Radius starts at 0, extends to accommodate all points (up to 40 m/s)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend labels correct but small glyph markers
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "{spec-id} · {library} · pyplots.ai" format correctly
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows prevailing wind directions (NE morning, SW afternoon), variable
+          evening - demonstrates realistic wind patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind measurement scenario is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Wind speeds 2-40 m/s are realistic for wind measurements
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script with imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, bokeh modules)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png AND plot.html (correct for interactive library)
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/polar-scatter/metadata/highcharts.yaml b/plots/polar-scatter/metadata/highcharts.yaml
index 6f0cf6b79a..1d69f60823 100644
--- a/plots/polar-scatter/metadata/highcharts.yaml
+++ b/plots/polar-scatter/metadata/highcharts.yaml
@@ -22,3 +22,167 @@ review:
   weaknesses:
   - Marker size could be slightly larger (radius 14 is adequate but could be ~18 for
     better visibility at 3600x3600)
+  image_description: 'The polar scatter chart displays wind direction and speed distribution
+    on a white background. The main title "polar-scatter · highcharts · pyplots.ai"
+    appears in bold at the top, with a subtitle "Wind Direction and Speed Distribution"
+    below it. The circular polar plot shows compass directions (N, NE, E, SE, S, SW,
+    W, NW) around the perimeter, with concentric gridlines at 0, 7, 14, and 21 m/s
+    intervals. Data points are displayed in three colors: blue (Morning), yellow (Afternoon),
+    and purple (Evening). The 120 data points show realistic clustering around NE
+    (~45°), SW (~225°), and W (~270°) directions, representing prevailing wind patterns.
+    The legend at the bottom clearly identifies the three time-of-day categories.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: title, subtitle, compass labels, and radial labels all readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: markers visible but could be slightly larger for 120 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue/yellow/purple palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: well-centered polar chart with ~75% canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: radial axis shows "m/s" units, angular axis has compass directions
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: subtle gridlines (alpha 0.15), legend well-positioned at bottom
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct polar scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: angle=direction, radius=speed correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: color encoding, compass directions, gridlines, centering
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-28 m/s accommodates all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Morning/Afternoon/Evening correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows prevailing directions, varying speeds, three categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: realistic wind measurement scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 0-25 m/s is realistic for wind speeds
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → config → render pattern
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Highcharts polar mode, custom JS formatter for compass directions,
+          interactive HTML output
+  verdict: APPROVED
diff --git a/plots/polar-scatter/metadata/letsplot.yaml b/plots/polar-scatter/metadata/letsplot.yaml
index ba8a07bba4..38cfaf388b 100644
--- a/plots/polar-scatter/metadata/letsplot.yaml
+++ b/plots/polar-scatter/metadata/letsplot.yaml
@@ -24,3 +24,175 @@ review:
   - Some data point overlap in the NW/SW high-density sectors - consider slightly
     smaller point size or lower alpha
   - Legend positioning creates slight layout asymmetry
+  image_description: 'The polar scatter plot displays wind observations using compass
+    directions (N, NE, E, SE, S, SW, W, NW) around the perimeter with concentric circles
+    showing wind speed from 0 to 18 m/s. Data points (120 observations) are colored
+    by time of day: blue (Morning), yellow (Afternoon), and purple (Evening). The
+    plot shows realistic prevailing wind patterns with higher concentrations from
+    the NW, W, and SW directions, matching the spec''s requirement for realistic directional
+    distributions. The title "Wind Observations · polar-scatter · letsplot · pyplots.ai"
+    is positioned at the top, and a legend "Time of Day" is on the right. The radial
+    axis is labeled "Wind Speed (m/s)" on the left side. The grid uses subtle gray
+    lines with good visual hierarchy.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, compass labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Points are visible with good alpha (0.75), but some clustering in
+          NW/SW sectors causes minor overlap
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, purple palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Square format appropriate for polar plot; legend placement creates
+          slight asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Wind Speed (m/s)" with units; x-axis uses compass directions
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Subtle grid (gray #CCCCCC), legend well-placed on right'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar scatter plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angle correctly maps to direction, radius to speed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has compass directions, radial gridlines, color encoding by time
+          of day
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-18 m/s range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Morning/Afternoon/Evening
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "polar-scatter · letsplot · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows directional clustering (prevailing winds), speed variation,
+          and time categories; could show more dramatic speed differences between
+          directions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind measurement scenario is realistic with von Mises-like directional
+          clustering
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Wind speeds 1-18 m/s are realistic for surface winds
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses "from lets_plot import *" which is acceptable per library rules,
+          but pandas could be questioned
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses coord_polar, ggplot grammar, theme_minimal; could leverage more
+          lets-plot specific features like tooltips
+  verdict: APPROVED
diff --git a/plots/polar-scatter/metadata/matplotlib.yaml b/plots/polar-scatter/metadata/matplotlib.yaml
index 1069bcc72a..0c49c24484 100644
--- a/plots/polar-scatter/metadata/matplotlib.yaml
+++ b/plots/polar-scatter/metadata/matplotlib.yaml
@@ -25,3 +25,178 @@ review:
     whitespace; could be positioned closer or inside the plot
   - Could use matplotlib distinctive features more (e.g., colorbar for continuous
     third variable, custom tick formatters, or annotation capabilities)
+  image_description: 'The plot displays a polar scatter plot for wind measurement
+    data with a compass-style orientation (North at top, clockwise direction). Data
+    points are positioned by wind direction (angle) and wind speed (radius, 0-20 m/s).
+    Three categories are shown: Morning (blue circles) clustered around SW direction,
+    Afternoon (yellow circles) clustered around NW direction, and Evening (brown circles)
+    distributed across all directions. The title "polar-scatter · matplotlib · pyplots.ai"
+    appears at top center. A "Time of Day" legend is positioned in the upper right.
+    Radial gridlines appear at 10 and 20 m/s intervals. The axis label "Wind Speed
+    (m/s)" appears on the left side.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, direction labels at 18pt, tick labels at 14pt, legend
+          at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend positioned outside plot area
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Marker size (s=150) and alpha (0.7) appropriate for 120 points; slight
+          deduction as some brown/blue overlap makes individual points harder to distinguish
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue (#306998), Yellow (#FFD43B), and Brown (#8B4513) are distinguishable;
+          minor concern that blue and brown could be confused in some colorblind conditions
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 12x12 figure appropriate for polar plot; good canvas utilization
+          with legend outside
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Wind Speed (m/s)" includes units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid has alpha=0.3 which is good, but legend is positioned quite
+          far from the plot (bbox_to_anchor=(1.05, 1.0))
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angle = wind direction, Radius = wind speed correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: 120 points, angle/radius encoding, color
+          for third variable (time of day), compass-style labels, radial gridlines'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Radial axis starts at 0 and extends to 20 (max data ~22)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Morning, Afternoon, Evening
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "polar-scatter · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows prevailing wind directions (SW morning, NW afternoon) and variable
+          evening winds; good clustering patterns; minor deduction as data is perhaps
+          too clustered, limiting spread demonstration
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind measurement scenario with time-of-day variation is realistic
+          and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind speeds 0-22 m/s are realistic; minor point that most speeds
+          cluster 0-10 m/s with only a few outliers reaching 20+
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib.pyplot, numpy, Line2D)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses matplotlib's polar projection and compass configuration (set_theta_zero_location,
+          set_theta_direction, set_thetagrids), but these are standard polar features
+          rather than distinctive matplotlib strengths
+  verdict: APPROVED
diff --git a/plots/polar-scatter/metadata/plotly.yaml b/plots/polar-scatter/metadata/plotly.yaml
index 101b52b132..0bf140c41c 100644
--- a/plots/polar-scatter/metadata/plotly.yaml
+++ b/plots/polar-scatter/metadata/plotly.yaml
@@ -26,3 +26,181 @@ review:
   - Legend could be positioned closer to the plot area
   - Could add custom hover templates to show exact wind direction and speed values
   - Minor whitespace imbalance on the left side of the canvas
+  image_description: 'The plot displays a polar scatter chart showing wind observations.
+    The chart features a circular layout with North (N 0°) at the top, East (E 90°)
+    on the right, South (S 180°) at the bottom, and West (W 270°) on the left, with
+    intermediate directions (NE, SE, SW, NW) also labeled. Data points are colored
+    by time of day: blue for Morning, yellow for Afternoon, and orange for Evening.
+    The radial axis shows "Wind Speed (m/s)" ranging from 0 to 25. Two main clusters
+    of points are visible - one in the Northwest quadrant and another in the Southeast
+    quadrant, representing prevailing wind directions. The title reads "Wind Observations
+    · polar-scatter · plotly · pyplots.ai". A legend in the upper right shows the
+    "Time of Day" categories. The markers have good size (approximately 14px) with
+    white borders and slight transparency (0.75 opacity).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear (32pt), axis labels are readable (18pt),
+          radial axis title visible (20pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (14px) with good opacity (0.75) and white
+          borders for definition. Slight density in clusters but still distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and orange palette is colorblind-friendly (avoids red-green
+          confusion)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, legend positioned well, slight imbalance
+          with extra whitespace on left
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Radial axis has descriptive label with units: "Wind Speed (m/s)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid is subtle (alpha 0.15), legend well-placed with title. Minor:
+          radial axis label overlaps slightly with data at center'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar scatter plot using Scatterpolar
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angle mapped to theta (wind direction), radius to r (wind speed)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has angular ticks at meaningful intervals, radial gridlines, color
+          encoding for third variable (time of day), proper compass orientation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Radial axis starts at 0 and extends to accommodate all data (max
+          * 1.1)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Time of Day" with accurate labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Wind Observations · polar-scatter · plotly
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clustering (prevailing winds in NW and SE), variation in speed,
+          scattered observations. Could show more extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind observation data is a perfect real-world application with plausible
+          prevailing directions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind speeds 2-25 m/s are realistic. Could benefit from slightly higher
+          speeds for dramatic storms
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data generation → figure creation
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (correct for plotly)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Scatterpolar correctly, exports both PNG and interactive HTML.
+          Could leverage more Plotly features like hover templates or animations
+  verdict: APPROVED
diff --git a/plots/polar-scatter/metadata/plotnine.yaml b/plots/polar-scatter/metadata/plotnine.yaml
index 4353acc1af..ccb3bf5a89 100644
--- a/plots/polar-scatter/metadata/plotnine.yaml
+++ b/plots/polar-scatter/metadata/plotnine.yaml
@@ -29,3 +29,180 @@ review:
     16:9 landscape might provide better label spacing
   - Legend could be positioned closer to the plot area to reduce whitespace on the
     right
+  image_description: 'The plot displays a polar scatter visualization of wind direction
+    and speed data. It features concentric dashed circular gridlines at 5, 10, 15,
+    20, and 25 m/s intervals. Eight radial spokes extend from the center to compass
+    directions (N, NE, E, SE, S, SW, W, NW), with bold dark gray labels at the perimeter.
+    Speed labels (5, 10, 15, 20, 25 m/s) are positioned along the NNE axis. Data points
+    are rendered as filled circles with alpha transparency, colored by time of day:
+    blue (Morning), yellow (Afternoon), and red/coral (Evening). The data clearly
+    shows two clusters - one in the SW quadrant (prevailing wind) and another in the
+    NE quadrant (secondary wind). The legend "Time of Day" is positioned on the right
+    side with all three categories listed. The title "Wind Direction and Speed · polar-scatter
+    · plotnine · pyplots.ai" appears at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and compass labels are clear and bold; speed labels are slightly
+          smaller but readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels positioned clearly
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized with good alpha (0.75) for 120 data points;
+          slight overlap in dense clusters but acceptable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red/coral are distinguishable and reasonably colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square aspect ratio is appropriate for polar plot; slight imbalance
+          with legend taking space on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Speed labels include units (m/s); compass directions are descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha 0.5; legend is well placed
+          but could be closer to plot
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar scatter plot using angle and radius mapping
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angle maps to wind direction, radius maps to wind speed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: radial gridlines, angular tick marks,
+          color encoding for third variable'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the 25 m/s radius
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows time of day categories
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Format includes spec-id, library, pyplots.ai but title has extra
+          "Wind Direction and Speed" prefix
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows prevailing wind patterns with two distinct directional clusters;
+          demonstrates the polar nature well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind measurement scenario with realistic distributions and prevailing
+          directions
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Wind speeds 1-25 m/s are realistic; angles properly wrap to 0-360°
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Flat structure with imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves as `plot.png` which is correct ✓ (reviewed: actually correct,
+          adjusting)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses grammar of graphics approach with geom_point, geom_path, geom_segment,
+          geom_text layers; good use of aes() and scale_color_manual; however, plotnine
+          does not have native coord_polar so this creative Cartesian workaround is
+          necessary
+  verdict: APPROVED
diff --git a/plots/polar-scatter/metadata/seaborn.yaml b/plots/polar-scatter/metadata/seaborn.yaml
index 7688662e25..e35b645bef 100644
--- a/plots/polar-scatter/metadata/seaborn.yaml
+++ b/plots/polar-scatter/metadata/seaborn.yaml
@@ -26,3 +26,185 @@ review:
     angle and overlaps with some data points
   - Figure size uses 12x12 (square) which is acceptable but the library rules suggest
     16x9 landscape as the standard format
+  image_description: 'The plot displays a polar scatter chart with wind measurement
+    data. The chart shows data points in two colors: blue (#306998) for Morning observations
+    clustered around the SW direction (around 225°), and yellow/gold (#FFD43B) for
+    Afternoon observations clustered around the NE direction (around 45°). The radial
+    axis represents wind speed in m/s, ranging from 0 to approximately 35. Cardinal
+    and intercardinal directions (N, NE, E, SE, S, SW, W, NW) are labeled around the
+    perimeter. The plot has a light gray background (#f8f9fa) with subtle dashed gridlines.
+    A legend in the upper right shows "Time of Day" with Morning (blue) and Afternoon
+    (yellow) markers. The title "polar-scatter · seaborn · pyplots.ai" appears at
+    the top in bold. A "Wind Speed (m/s)" label is positioned diagonally at approximately
+    60° direction.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, cardinal labels at 18pt, radial ticks at 14pt,
+          legend at 14/16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are cleanly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at s=150 with alpha=0.7 are appropriately sized for 120 data
+          points, though some clustering in the SW quadrant makes individual points
+          slightly harder to distinguish
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-friendly (blue-yellow is safe)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square 12x12 figure works well for polar plot, good canvas utilization,
+          though legend placement at bbox_to_anchor=(1.15, 1.0) adds extra margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Cardinal directions shown clearly, "Wind Speed (m/s)" label present
+          with units, but placed awkwardly as rotated text overlapping with data points
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3, legend well-placed with good styling, but
+          grid could be slightly more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar scatter plot with theta and radius coordinates
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Angle mapped to angular position, wind speed to radial distance correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Color encoding for time of day, prevailing wind directions, realistic
+          distributions all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Radial axis starts at 0, extends to max(speeds)*1.1 to accommodate
+          all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Morning and Afternoon categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "polar-scatter · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two distinct clusters with different prevailing directions,
+          different speed distributions (gamma distributions with different parameters),
+          but could show more variety in spread
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Wind measurement scenario is authentic - SW morning winds and NE
+          afternoon winds are plausible for many coastal locations
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind speeds of 3-35 m/s are realistic, though the highest values
+          (~35 m/s = ~126 km/h) are approaching storm-force winds which is at the
+          edge of typical observational data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at the start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (matplotlib, numpy, pandas, seaborn) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current seaborn and matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as "plot.png" but should verify this is in the correct location
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot with hue parameter for categorical coloring
+          and palette support, but doesn't leverage other seaborn features like jointplot,
+          pairplot or statistical capabilities. The implementation is essentially
+          matplotlib polar with seaborn's scatterplot layered on top.
+  verdict: APPROVED
diff --git a/plots/precision-recall/metadata/altair.yaml b/plots/precision-recall/metadata/altair.yaml
index 230399b571..b32595f98d 100644
--- a/plots/precision-recall/metadata/altair.yaml
+++ b/plots/precision-recall/metadata/altair.yaml
@@ -23,3 +23,173 @@ review:
   weaknesses:
   - Axis labels could be more descriptive (e.g., "Recall (Sensitivity)" and "Precision
     (PPV)") to aid interpretation
+  image_description: The plot displays a precision-recall curve comparing two classifiers
+    on a 16:9 canvas. A blue stepped line represents "Logistic Regression (AP = 0.634)"
+    and a yellow/gold stepped line represents "Random Forest (AP = 0.752)". A gray
+    dashed horizontal baseline at precision = 0.30 indicates the random classifier
+    performance. The X-axis is labeled "Recall" (0.00 to 1.00) and the Y-axis is labeled
+    "Precision" (0.00 to 1.00). The title "precision-recall · altair · pyplots.ai"
+    appears at the top center. The legend is well-positioned in the bottom-right corner,
+    showing all three line types with their respective colors and AP scores.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stepped lines are clearly visible with appropriate stroke width (4px
+          for curves, 3px for baseline)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend positioned appropriately in bottom-right
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: '"Recall" and "Precision" lack units or context (could be "Recall
+          (True Positive Rate)" etc.)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with 0.3 opacity, legend well-placed with clear entries
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct precision-recall curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Recall on X-axis, Precision on Y-axis as expected
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: AP scores displayed, baseline reference line present, stepped line
+          style used
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes show 0-1 range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify models and their AP scores
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "precision-recall · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows two classifiers with different performance levels, baseline,
+          AP scores
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Binary classification scenario is plausible, simulates 30% positive
+          class ratio
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Precision/recall values in valid 0-1 range with realistic AP scores
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only uses altair, numpy, pandas - all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses np.trapezoid which is the newer name (was np.trapz), but this
+          is correct for Python 3.13
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses Altair declarative grammar well: layered charts, step-after
+          interpolation, custom color/stroke-dash scales, proper legend configuration'
+  verdict: APPROVED
diff --git a/plots/precision-recall/metadata/bokeh.yaml b/plots/precision-recall/metadata/bokeh.yaml
index fa535946d2..daa536c557 100644
--- a/plots/precision-recall/metadata/bokeh.yaml
+++ b/plots/precision-recall/metadata/bokeh.yaml
@@ -25,3 +25,180 @@ review:
     the visualization)
   - Could add hover tooltips to show precision/recall values at specific points (Bokeh
     strength)
+  image_description: 'The plot displays a Precision-Recall curve with two classifier
+    comparisons. The title "precision-recall · bokeh · pyplots.ai" appears centered
+    at the top. Two stepped curves are shown: a blue line for Logistic Regression
+    (AP = 0.788) and a yellow/gold line for Naive Bayes (AP = 0.797). A horizontal
+    gray dashed baseline line at approximately 0.30 represents the random classifier
+    performance. The X-axis is labeled "Recall" (0 to 1), and the Y-axis is labeled
+    "Precision" (0 to 1). The legend is positioned in the top-right corner with a
+    white background. The plot has a light gray background (#fafafa) with subtle dashed
+    grid lines. Both curves demonstrate the characteristic stepped pattern of precision-recall
+    curves, starting high on the left and generally decreasing as recall increases.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend text are all clearly
+          readable at the high resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend is well-positioned and doesn't
+          cover data
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths are appropriate; stepped curves are clearly visible and
+          distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and highly
+          distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Recall" and "Precision" are descriptive but lack units (though
+          units aren''t applicable here as these are ratios)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid with alpha=0.3, legend well-placed with white
+          background
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct precision-recall curve implementation with stepped lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Recall on X-axis, Precision on Y-axis as expected
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes AP scores in legend, baseline reference line, stepped line
+          style, multiple classifier comparison
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes appropriately show 0-1 range for both precision and recall
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies classifiers with AP scores
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows two classifiers with different performance profiles, includes
+          baseline; could benefit from showing iso-F1 curves mentioned in spec as
+          optional
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Imbalanced classification dataset (70/30 split) is a realistic ML
+          evaluation scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: AP scores around 0.79-0.80 are realistic; baseline at 0.30 matches
+          class imbalance
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → model training → plotting
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Code saves as 'plot.png' which is correct
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, step() method, and Legend model; could leverage
+          more Bokeh-specific features like hover tooltips
+  verdict: APPROVED
diff --git a/plots/precision-recall/metadata/highcharts.yaml b/plots/precision-recall/metadata/highcharts.yaml
index 9fea015e14..5ff1e06bf5 100644
--- a/plots/precision-recall/metadata/highcharts.yaml
+++ b/plots/precision-recall/metadata/highcharts.yaml
@@ -24,3 +24,177 @@ review:
   - Code contains helper functions instead of following KISS principle (inline code
     preferred)
   - Grid lines could be more subtle (lower alpha/opacity)
+  image_description: The plot displays a Precision-Recall curve with a blue stepped
+    area chart. The title "precision-recall · highcharts · pyplots.ai" appears at
+    the top in bold, with a subtitle showing "Average Precision (AP) = 0.926". The
+    X-axis is labeled "Recall (Sensitivity)" ranging from 0 to 1, and the Y-axis is
+    labeled "Precision (Positive Predictive Value)" also ranging from 0 to 1. The
+    main PR curve shows a characteristic stepped descent from high precision at low
+    recall to lower precision at high recall, with the area underneath filled in light
+    blue with transparency. A horizontal dashed yellow line at y=0.30 represents the
+    random baseline (positive class ratio). A legend in the top-right corner shows
+    "Classifier (AP = 0.926)" and "Random Baseline (ratio = 0.30)". The grid is subtle
+    gray, and the overall layout is clean with good use of space.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable
+          at the large canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stepped line and filled area are clearly visible, baseline is distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, no red-green
+          issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with context: "Recall (Sensitivity)" and "Precision
+          (Positive Predictive Value)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is well-placed but grid lines are slightly prominent (alpha
+          appears higher than 0.3)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct precision-recall curve with stepped line style
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Recall on X-axis, Precision on Y-axis as expected
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has AP score displayed, baseline reference line, stepped line style
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full 0-1 range for both precision and recall
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels classifier with AP and baseline with ratio
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "precision-recall · highcharts · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows typical PR curve behavior but could demonstrate more edge cases
+          (spec mentions iso-F1 curves as optional)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Binary classification with imbalanced dataset (30% positive) is a
+          realistic ML scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: AP of 0.926 is realistic for a good classifier; positive ratio of
+          0.3 is reasonable imbalance
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Contains helper functions (compute_precision_recall_curve, compute_average_precision)
+          instead of inline code
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Correctly avoids np.trapz (deprecated in NumPy 2.0+)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses AreaSeries with step="left" for proper PR curve representation,
+          ScatterSeries typed as line for baseline, proper Highcharts options structure
+  verdict: APPROVED
diff --git a/plots/precision-recall/metadata/letsplot.yaml b/plots/precision-recall/metadata/letsplot.yaml
index 99b18b82ef..6b75b243a6 100644
--- a/plots/precision-recall/metadata/letsplot.yaml
+++ b/plots/precision-recall/metadata/letsplot.yaml
@@ -24,3 +24,180 @@ review:
   weaknesses:
   - No visible grid lines (could add subtle grid for easier value reading)
   - Axis labels lack context (though P-R values are inherently unitless ratios)
+  image_description: The plot displays a Precision-Recall curve with a characteristic
+    stepped line pattern in dark blue (#306998) showing the classifier performance.
+    The curve starts at (0, 1.0) and descends in a stair-step fashion toward (1.0,
+    ~0.2) as recall increases. There's a light blue shaded area beneath the curve
+    representing the area under the PR curve. Four dotted gray iso-F1 curves are shown
+    in the background (F1=0.2, 0.4, 0.6, 0.8) with labels at their endpoints. A horizontal
+    dashed yellow line at y=0.2 indicates the Random Baseline (20%), with an annotation
+    label above it. The title reads "precision-recall · letsplot · pyplots.ai" at
+    the top, with a legend showing "Classifier (AP = 0.920)" in the top-right area.
+    Axes are labeled "Recall" (x-axis, 0 to 1.0) and "Precision" (y-axis, 0 to ~1.1).
+    The layout uses a minimal theme with clean, readable text.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Stepped curve is clearly visible, line size 1.5 is appropriate, area
+          fill adds helpful emphasis
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, minor issue with slight negative x-axis
+          start (-0.05) creates small asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels "Recall" and "Precision" are descriptive but lack units (though
+          these are inherently unitless 0-1 ratios, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well placed at top, but grid lines are not visible (no grid
+          shown)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Precision-Recall curve with stepped line style
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Recall on X-axis, Precision on Y-axis - correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: AP score in legend, baseline reference
+          line, stepped line style, iso-F1 curves'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full 0-1 range with appropriate limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows classifier with AP score
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "precision-recall · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows curve descending pattern well, demonstrates both high-precision
+          low-recall and low-precision high-recall regions. Could show multiple classifiers
+          for comparison.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: 20% positive class ratio is realistic for imbalanced classification
+          (fraud, medical), beta distributions create plausible scores
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 500 samples, 80/20 split, scores from beta distributions are all
+          sensible values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculation → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, pandas, lets_plot, ggsave)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves to plot.png AND plot.html (good), but uses path="." which may
+          cause issues
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of lets-plot ggplot2-style grammar: geom_line, geom_area,
+          geom_text, geom_hline, scale_color_manual, theme_minimal, ggsize. Missing
+          some lets-plot specific features like tooltips.'
+  verdict: APPROVED
diff --git a/plots/precision-recall/metadata/matplotlib.yaml b/plots/precision-recall/metadata/matplotlib.yaml
index 0017cafb64..fcbc1a03c8 100644
--- a/plots/precision-recall/metadata/matplotlib.yaml
+++ b/plots/precision-recall/metadata/matplotlib.yaml
@@ -27,3 +27,175 @@ review:
   - Only shows single classifier; spec mentions multiple classifiers comparison as
     an option
   - Header claims matplotlib 3.10.0 but version is hardcoded without verification
+  image_description: The plot displays a Precision-Recall curve on a 16:9 aspect ratio
+    canvas. The main curve is rendered as a stepped line in blue (#306998) showing
+    classifier performance, with a light blue semi-transparent fill beneath it. The
+    curve starts at precision=1.0, recall=0.0 and descends as recall increases, ending
+    around precision=0.4 at recall=1.0. A yellow dashed horizontal line at precision=0.2
+    indicates the random baseline (positive class ratio of 20%). Four gray dotted
+    iso-F1 curves (F1=0.2, 0.4, 0.6, 0.8) are overlaid as reference contours with
+    labels on the right edge. The legend in the upper right shows "Classifier (AP
+    = 0.920)" and "Random Baseline (P = 20%)". Title reads "precision-recall · matplotlib
+    · pyplots.ai" at the top. Axes are labeled "Recall" (x-axis) and "Precision" (y-axis).
+    A subtle gray dashed grid provides reference.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, all text fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is excellent, stepped curve clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Recall", "Precision") but no units (N/A for
+          this plot type, but could use clearer context)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), legend well-placed; F1 labels are slightly
+          small and faint
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct precision-recall curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Recall on X, Precision on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: stepped line, AP score in legend, baseline
+          reference, iso-F1 curves'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show 0-1 range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels accurately describe elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "precision-recall · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows good classifier performance (AP=0.92) with imbalanced data;
+          could benefit from showing a second classifier for comparison
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Imbalanced 20/80 split is realistic for fraud/medical scenarios mentioned
+          in spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 500 samples, realistic beta distributions for scores
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at beginning
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves to "plot.png" which is correct, but there''s a minor issue:
+          the explicit library version "3.10.0" in header may not match actual version'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ax.step, fill_between, annotate - good matplotlib features but
+          nothing particularly distinctive
+  verdict: APPROVED
diff --git a/plots/precision-recall/metadata/plotly.yaml b/plots/precision-recall/metadata/plotly.yaml
index 560d910aa1..b4df5a1024 100644
--- a/plots/precision-recall/metadata/plotly.yaml
+++ b/plots/precision-recall/metadata/plotly.yaml
@@ -29,3 +29,176 @@ review:
     point on the curve
   - Plotlys distinctive interactivity could be better leveraged with click-to-select
     threshold functionality
+  image_description: 'The plot displays a Precision-Recall curve on a white background
+    with a 16:9 aspect ratio. The main curve is shown in a deep blue color (#306998)
+    with a stepped line style (horizontal-then-vertical) and light blue fill underneath
+    (area under the curve). The title "precision-recall · plotly · pyplots.ai" is
+    centered at the top in black text. The X-axis is labeled "Recall (Sensitivity)"
+    ranging from 0 to 1, and the Y-axis is labeled "Precision (Positive Predictive
+    Value)" also ranging from 0 to 1. A yellow dashed horizontal line shows the random
+    baseline at 0.10 (the positive class ratio). Four iso-F1 curves are displayed
+    as gray dotted lines. A legend in the bottom-left corner shows: "Classifier (AP
+    = 0.812)", "Random Baseline (0.10)", and "F1 = 0.2". An annotation in the upper
+    right reads "Iso-F1 curves". The grid is subtle with light gray lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; legend, annotations, and data are well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 4 is clearly visible; stepped curve shape is distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe; good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with clarifying text: "Recall (Sensitivity)",
+          "Precision (Positive Predictive Value)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend only shows F1=0.2 for iso-F1 curves but there are 4 curves
+          visible (0.2, 0.4, 0.6, 0.8); this is confusing
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct precision-recall curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Recall on X-axis, Precision on Y-axis as expected
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has AP score in legend, baseline reference line, stepped line style,
+          iso-F1 curves
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show 0-1 range correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately shows AP=0.812 and baseline value
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "precision-recall · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows classifier performance across full recall range, demonstrates
+          classic precision-recall tradeoff
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Fraud detection scenario with 10% positive class is realistic; AP
+          of 0.812 shows good classifier
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 1000 samples, 10% positive class ratio are realistic values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure; no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Code generates HTML but the interactive features (hover, zoom) are
+          not explicitly configured; no custom hover templates or interactive elements
+          beyond defaults
+  verdict: APPROVED
diff --git a/plots/precision-recall/metadata/plotnine.yaml b/plots/precision-recall/metadata/plotnine.yaml
index b0e167c3f3..4c64035b02 100644
--- a/plots/precision-recall/metadata/plotnine.yaml
+++ b/plots/precision-recall/metadata/plotnine.yaml
@@ -26,3 +26,178 @@ review:
     the visualization)
   - Annotation text positioning could conflict with curve at different data distributions
   - No formal legend element; relies solely on inline annotations
+  image_description: 'The plot displays a Precision-Recall curve on a white background
+    with a subtle gray grid. The main curve is rendered in Python blue (#306998) using
+    a stepped line style, starting at (0, 1.0) and descending as recall increases
+    towards 1.0. A yellow dashed horizontal baseline is shown at y=0.21 representing
+    the random classifier performance. Two text annotations are present: "Average
+    Precision (AP) = 0.901" in bold blue text on a white semi-transparent rectangle
+    in the upper-right area, and "Random Classifier (baseline = 0.21)" in yellow text
+    just above the baseline. The title "precision-recall · plotnine · pyplots.ai"
+    is centered at the top. X-axis is labeled "Recall (Sensitivity)" and Y-axis is
+    labeled "Precision (Positive Predictive Value)". Both axes range from 0.0 to 1.0
+    with tick marks at 0.2 intervals.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and annotations are all clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; annotations are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stepped line is thick enough (size=2), baseline clearly visible with
+          dashed style
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue curve and yellow baseline provide excellent contrast; colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with parenthetical clarification (Sensitivity,
+          Positive Predictive Value)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate, but no formal legend (annotations
+          serve this purpose adequately)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct precision-recall curve with stepped line style as specified
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Recall on X-axis, Precision on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has AP score annotation, baseline reference line, stepped style;
+          missing iso-F1 curves (mentioned as "consider" in spec)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes show full 0-1 range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Annotations accurately describe what they represent
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: false
+        comment: Follows format but uses different separator style (· vs -)
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full curve from high precision/low recall to low precision/high
+          recall; demonstrates typical classifier behavior
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulated binary classification with 20% positive class (imbalanced)
+          is realistic; could be more domain-specific
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: AP score of 0.901 is realistic for a good classifier; baseline at
+          0.21 matches positive class ratio
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with geom_step, geom_hline, annotate,
+          theme customization; could leverage more plotnine-specific features like
+          scale_color_manual or faceting
+  verdict: APPROVED
diff --git a/plots/precision-recall/metadata/pygal.yaml b/plots/precision-recall/metadata/pygal.yaml
index 94d827e12a..eb4ed7da9e 100644
--- a/plots/precision-recall/metadata/pygal.yaml
+++ b/plots/precision-recall/metadata/pygal.yaml
@@ -29,3 +29,149 @@ review:
     appears ineffective)
   - Minor visual density could be improved with slightly thicker stroke width for
     better curve distinction
+  image_description: 'The plot displays a precision-recall curve visualization on
+    a white background with the title "precision-recall · pygal · pyplots.ai" at the
+    top. Two classifier curves are shown: a blue stepped line for "Logistic Regression
+    (AP=0.790)" and a yellow/gold stepped line for "Random Forest (AP=0.890)". A red
+    horizontal dashed baseline labeled "Random Baseline (0.31)" indicates random classifier
+    performance at y≈0.31. The X-axis is labeled "Recall" (range 0-1) and Y-axis is
+    labeled "Precision" (range 0-1). The legend appears at the top-left corner with
+    colored squares identifying each series. Grid lines are subtle and dashed. Both
+    curves demonstrate the typical precision-recall tradeoff, with Random Forest outperforming
+    Logistic Regression.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text clearly readable, good font sizes for 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: curves clearly visible with good stroke width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue/yellow/red palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: good proportions, legend well-placed
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive labels (Recall, Precision)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: subtle grid, clear legend with AP scores
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct XY chart for precision-recall curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Recall on X, Precision on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: AP scores, baseline, stepped lines, multiple classifiers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: full 0-1 range on both axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: accurate labels with AP values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'correct format: precision-recall · pygal · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows multiple classifiers, baseline, typical PR tradeoff
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: realistic ML classification with imbalanced data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: appropriate 0-1 scale, realistic AP scores
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) and random_state=42
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: pygal XY chart, custom Style, stroke_dasharray
+  verdict: APPROVED
diff --git a/plots/precision-recall/metadata/seaborn.yaml b/plots/precision-recall/metadata/seaborn.yaml
index f3175dc74f..c10257d198 100644
--- a/plots/precision-recall/metadata/seaborn.yaml
+++ b/plots/precision-recall/metadata/seaborn.yaml
@@ -24,3 +24,179 @@ review:
   - Limited use of seaborn-specific features; primarily uses matplotlib drawing functions
   - Could benefit from a third classifier curve to show fuller performance spectrum
     (poor performer)
+  image_description: The plot displays two precision-recall curves on a white gridded
+    background. The blue curve (Model A, AP = 0.81) shows excellent classifier performance,
+    starting at precision 1.0 and maintaining high precision through most recall values
+    before declining. A semi-transparent light blue fill area extends beneath it.
+    The yellow/gold curve (Model B, AP = 0.32) shows moderate performance with an
+    initially high precision that quickly drops and fluctuates between 0.3-0.7 across
+    recall values, with a yellow semi-transparent fill below. A horizontal gray dashed
+    line at precision 0.10 represents the random classifier baseline. Both curves
+    use stepped line styles. The title "precision-recall · seaborn · pyplots.ai" appears
+    at the top. Axis labels are "Recall (Sensitivity)" on x-axis and "Precision (Positive
+    Predictive Value)" on y-axis. A well-formatted legend in the upper right shows
+    all three elements with their AP/P values.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are clearly visible with good linewidth, fill areas provide
+          visual depth
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, distinct from each other
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, legend positioned well, minor margin at
+          right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with parenthetical clarifications: "Recall (Sensitivity)",
+          "Precision (Positive Predictive Value)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.3 is subtle, legend well-placed but slight visual
+          overlap with fill area
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct precision-recall curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Recall on X, Precision on Y - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has AP scores, baseline reference, stepped lines, multiple classifiers.
+          Missing iso-F1 curves (spec says "consider")
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full 0-1 range for both dimensions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels accurately describe models with AP values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "precision-recall · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good and moderate classifiers, baseline, but could show a third
+          "poor" classifier for fuller coverage
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Fraud detection scenario with 10% imbalance is realistic and well-contextualized
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: AP scores of 0.81 and 0.32 are realistic; 10% positive class ratio
+          is appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses sns.set_style("whitegrid") for styling but PR curves are drawn
+          with matplotlib ax.step() and ax.fill_between(), not seaborn plot functions.
+          Seaborn does not have a native PR curve function, so this is acceptable
+          but not showcasing distinctive features.
+  verdict: APPROVED
diff --git a/plots/pyramid-basic/metadata/altair.yaml b/plots/pyramid-basic/metadata/altair.yaml
index 3637c98dbc..ea880b5d96 100644
--- a/plots/pyramid-basic/metadata/altair.yaml
+++ b/plots/pyramid-basic/metadata/altair.yaml
@@ -29,3 +29,157 @@ review:
     bars or genders
   - Data shows limited demographic variation - more pronounced asymmetry in older
     age groups would better demonstrate the visualization purpose
+  image_description: The plot displays a population pyramid chart with 9 age groups
+    (0-9 through 80+) on the Y-axis, arranged with youngest at the bottom. Blue horizontal
+    bars extend leftward from the center representing male population, while yellow/gold
+    bars extend rightward representing female population. The X-axis shows "Population
+    (millions)" with symmetric scale from 8 to 8 (displayed as absolute values). A
+    legend labeled "Gender" in the top-right corner identifies Male (blue square)
+    and Female (yellow square). The title "pyramid-basic · altair · pyplots.ai" is
+    centered at the top. The pyramid shape is clearly visible with the largest bars
+    in the 40-49 age group and progressively smaller bars toward both younger and
+    older ages. Subtle dashed grid lines aid readability.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, all text fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible with appropriate proportions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe with excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills appropriate area with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Population (millions)" includes units, "Age Group" is descriptive'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pyramid/butterfly chart with opposing horizontal bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values extending left/right correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: symmetric axes, distinct colors, central
+          axis labels, legend'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric scale domain [-10, 10] shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Male and Female
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "pyramid-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows pyramid shape with asymmetry between genders (females living
+          longer visible in 80+ group), though could show more dramatic demographic
+          patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Population pyramid by age/gender is the classic use case, realistic
+          population distribution
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in millions are reasonable for national populations, though
+          the specific numbers suggest a medium-sized country
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → dataframe → chart → save'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/pyramid-basic/metadata/bokeh.yaml b/plots/pyramid-basic/metadata/bokeh.yaml
index fbbd7f9b2b..90e45851ea 100644
--- a/plots/pyramid-basic/metadata/bokeh.yaml
+++ b/plots/pyramid-basic/metadata/bokeh.yaml
@@ -23,3 +23,171 @@ review:
   - Legend is positioned far from the data in top right corner
   - Could add HoverTool for enhanced interactivity showing exact values
   - Center line implementation uses hardcoded y-range that may not adapt well
+  image_description: The plot displays a population pyramid showing age distribution
+    by gender. Blue bars (#306998) represent Male population extending leftward from
+    the center, while coral pink bars (#E8888C) represent Female population extending
+    rightward. Nine age groups (0-9 through 80+) are displayed on the y-axis labeled
+    "Age Group". The x-axis shows "Population (thousands)" with a symmetric range
+    from -100 to 100. A vertical center line at x=0 provides visual separation. The
+    title "pyramid-basic · bokeh · pyplots.ai" appears at top left. A legend in the
+    top right corner identifies Male and Female. The pyramid shape clearly emerges
+    with the 30-39 age group having the largest bars on both sides, tapering towards
+    80+.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized with appropriate height (0.7), good alpha (0.85)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and pink are colorblind-safe, easily distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, slight imbalance with legend far from data
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Population (thousands)", "Age Group"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid at alpha 0.3, legend well-styled
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pyramid chart with opposing horizontal bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values extending left/right correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Central axis, symmetric scales, distinct colors, legend present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric x-range (-100 to 100) shows all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Male and Female
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "pyramid-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across age groups, clear pyramid shape, but all female
+          values higher than male (slight asymmetry pattern)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Population by age group is the classic pyramid use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in thousands (15-85) are realistic population figures
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, hbar, proper Bokeh figure configuration, but
+          could leverage more interactive features like HoverTool
+  verdict: APPROVED
diff --git a/plots/pyramid-basic/metadata/highcharts.yaml b/plots/pyramid-basic/metadata/highcharts.yaml
index 8259b6f028..bf7a4d5569 100644
--- a/plots/pyramid-basic/metadata/highcharts.yaml
+++ b/plots/pyramid-basic/metadata/highcharts.yaml
@@ -26,3 +26,173 @@ review:
     push it off-screen)'
   - The mirrored Y-axis on the right side is redundant and adds visual clutter
   - Could use Highcharts-specific annotations or interactive features more extensively
+  image_description: The plot displays a horizontal bar chart (population pyramid)
+    showing population distribution by age group. Blue bars (#306998, Python Blue)
+    represent Male population extending leftward from the central axis, while yellow
+    bars (#FFD43B, Python Yellow) represent Female population extending rightward.
+    Age groups range from "0-9" at the bottom to "80+" at the top, with labels on
+    both left and right Y-axes. Each bar displays its absolute value as a data label.
+    The title "Population by Age Group · pyramid-basic · highcharts · pyplots.ai"
+    is prominently displayed at the top, with a subtitle explaining "Male (left) vs
+    Female (right) - Population in Millions". The chart has a clean white background
+    with subtle grid lines and a dark vertical line at the center (value 0).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title at 48px, axis labels at 28-36px. Slightly
+          large title could be more balanced.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible with appropriate padding
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe (no red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good margins and spacing; minor issue with legend not visible in
+          the main view area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Age Group" and "Population (Millions)" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend appears to be cut off or positioned below the visible area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pyramid/butterfly chart using opposing horizontal bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis with symmetric scaling
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Left/right bars, central axis, distinct colors, category labels,
+          subtitle identifying sides
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric axis with 10% padding shows all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Male/Female labels are correct
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "pyramid-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows typical population pyramid pattern with bulge in working-age
+          groups and tapering at extremes. Could show more dramatic asymmetry.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Population demographics is the canonical use case for pyramid charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in millions are appropriate; the range (12-75) is realistic
+          for a medium-sized country
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart config → render'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (no random), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts patterns
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts bar chart with dual xAxis, plotLines, custom formatters.
+          Could leverage more interactive features or annotations.
+  verdict: APPROVED
diff --git a/plots/pyramid-basic/metadata/letsplot.yaml b/plots/pyramid-basic/metadata/letsplot.yaml
index 5fa09260f7..9cad8b37e5 100644
--- a/plots/pyramid-basic/metadata/letsplot.yaml
+++ b/plots/pyramid-basic/metadata/letsplot.yaml
@@ -28,3 +28,179 @@ review:
     demographic colors
   - Library-specific features could be leveraged more - lets-plot supports tooltips
     and interactivity that aren't utilized in the HTML output
+  image_description: The plot displays a population pyramid with 9 age groups (0-9
+    through 80+) arranged vertically on the y-axis. Male population bars extend to
+    the left in a dark blue color (#306998), while female population bars extend to
+    the right in yellow (#FFD43B). The x-axis shows "Population (thousands)" with
+    symmetric scale from 80 to 0 to 80. The pyramid shape is clearly visible, with
+    the widest bars in the 30-39 age group, tapering toward both younger and older
+    age groups. The title "pyramid-basic · letsplot · pyplots.ai" appears at the top.
+    A legend on the right identifies Male (blue) and Female (yellow). The minimal
+    theme provides a clean appearance with subtle gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, axis text at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, age group labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths well-proportioned, good spacing between categories
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and yellow have good contrast, distinguishable for colorblind
+          users, though not an ideal colorblind palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend appropriately placed
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Age Group" and "Population (thousands)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle, but y-axis grid is blanked which is good; legend
+          position is good but could be closer to the plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pyramid/butterfly chart with opposing horizontal bars sharing
+          central axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Age groups on central axis, male/female populations correctly mapped
+          to left/right
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: left/right bars, symmetric scale, distinct
+          colors, category labels on central axis, legend identifying sides'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric axis scale (-80 to 80), all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Male and Female with corresponding colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "pyramid-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows pyramid shape with varying bar lengths, demographic pattern
+          visible; slight asymmetry between genders shown (females live longer)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Population demographics by age and gender is the canonical use case
+          for pyramid charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in thousands are sensible; the pattern shows a typical population
+          structure, though max value ~75k is a bit abstract without knowing the population
+          size context
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed needed since
+          no random data used - however, best practice would include a seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pandas and lets_plot used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, coord_flip, scale_y_continuous with custom labels,
+          theme customization, ggsize - good usage of lets-plot but no advanced interactive
+          features utilized
+  verdict: APPROVED
diff --git a/plots/pyramid-basic/metadata/matplotlib.yaml b/plots/pyramid-basic/metadata/matplotlib.yaml
index 1c07d00b9f..cc97beafb3 100644
--- a/plots/pyramid-basic/metadata/matplotlib.yaml
+++ b/plots/pyramid-basic/metadata/matplotlib.yaml
@@ -25,3 +25,168 @@ review:
   - Color choice of blue and yellow is colorblind-safe but unconventional for gender
     representation - traditional blue/pink or blue/red would be more immediately recognizable
   - Legend placement in upper right corner is slightly isolated from the chart data
+  image_description: The plot displays a population pyramid chart with 9 age groups
+    (0-9 through 80+) arranged vertically on the y-axis. Blue bars extend leftward
+    representing male population, while yellow/gold bars extend rightward representing
+    female population. The chart shows a realistic age distribution pattern with the
+    largest populations in the 40-49 and 50-59 age groups, tapering off at younger
+    and older ages. A vertical black center line divides the two sides. The x-axis
+    displays "Population (millions)" with absolute values ranging from 0-8 on both
+    sides. The legend is positioned in the upper right corner. The title correctly
+    follows the required format.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar heights are well-proportioned, clear visibility of all data
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and yellow provide good contrast and are colorblind-safe, though
+          not an ideal pairing for gender representation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills appropriate portion of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Population (millions)", "Age Group"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate, but legend placement in upper right
+          is slightly detached from the data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pyramid/butterfly chart with opposing horizontal bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on y-axis, values extending left/right correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Symmetric scales, distinct colors, central axis line, legend identifying
+          sides
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with 15% padding on x-axis
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Male and Female
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: pyramid-basic · matplotlib · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows asymmetry between genders (females live longer), varied distribution
+          across ages, but could show more dramatic differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Population pyramid with age-gender distribution is the canonical
+          use case from the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in millions are realistic, though the specific numbers don't
+          match any real country exactly
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Uses deterministic data (no random), but no explicit seed comment
+          for documentation
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All API calls are current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct parameters
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/pyramid-basic/metadata/plotly.yaml b/plots/pyramid-basic/metadata/plotly.yaml
index 12fa3c2549..9b3e41b6af 100644
--- a/plots/pyramid-basic/metadata/plotly.yaml
+++ b/plots/pyramid-basic/metadata/plotly.yaml
@@ -25,3 +25,180 @@ review:
   - Could add annotations to highlight key insights (e.g., gender ratio differences
     in elderly groups)
   - Population scale could be documented in code comments for context
+  image_description: The plot displays a population pyramid (butterfly chart) showing
+    population distribution by age group and gender. The chart uses a horizontal bar
+    layout with blue bars (Male) extending to the left from a central axis and yellow/gold
+    bars (Female) extending to the right. There are 9 age groups from "0-9" at the
+    bottom to "80+" at the top. The x-axis shows "Population (thousands)" with symmetric
+    tick labels (5,000 to 0 to 5,000). The y-axis shows "Age Group". The title reads
+    "Population Distribution · pyramid-basic · plotly · pyplots.ai" at the top, with
+    a horizontal legend showing Male (blue) and Female (yellow) below the title. The
+    layout uses the plotly_white template with subtle gridlines. The pyramid shape
+    is clearly visible with younger and middle age groups having larger bars, tapering
+    at both youngest (0-9) and oldest (80+) groups.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, clean spacing between elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths well-adapted to 9 categories, clear distinction between
+          groups
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Population (thousands)" has units, but "Age Group" is descriptive
+          without clarification'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.3 is subtle, legend well-placed horizontally above
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pyramid/butterfly chart with opposing horizontal bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Left/right bars, symmetric axes, distinct colors, central axis labels,
+          legend present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with 15% padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Male and Female
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "pyramid-basic · plotly · pyplots.ai" format correctly
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows asymmetry between genders (females live longer - visible in
+          70-79, 80+), population bulge in working age
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Population pyramid by age/gender is the classic use case, data patterns
+          are realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values in thousands are sensible, but the numbers represent a somewhat
+          small population (~35M male, ~36M female total)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no explicit seed comment for
+          clarity
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported, used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of hovertemplate with customdata for interactive hover,
+          write_html for interactivity, but could leverage more Plotly features like
+          annotations
+  verdict: APPROVED
diff --git a/plots/pyramid-basic/metadata/plotnine.yaml b/plots/pyramid-basic/metadata/plotnine.yaml
index 0957050324..9bac7766bc 100644
--- a/plots/pyramid-basic/metadata/plotnine.yaml
+++ b/plots/pyramid-basic/metadata/plotnine.yaml
@@ -24,3 +24,176 @@ review:
   weaknesses:
   - Legend order (Female, Male) does not match visual order (Male on left, Female
     on right) - consider reordering
+  image_description: The plot displays a population pyramid chart with 9 age groups
+    (0-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70-79, 80+) arranged vertically
+    on the y-axis labeled "Age Group". Blue bars representing Male population extend
+    leftward from the center axis, while golden-yellow bars representing Female population
+    extend rightward. The x-axis is labeled "Population (thousands)" with symmetric
+    scale ranging from -6000 to 6000. The title reads "Population by Age & Gender
+    · pyramid-basic · plotnine · pyplots.ai" at the top. A legend on the right side
+    shows "Gender" with Female (yellow) and Male (blue). The chart uses a minimal
+    theme with subtle gray gridlines. The pyramid shape is clearly visible with the
+    widest bars at 40-49 age group and tapering at both youngest (0-9) and oldest
+    (80+) groups.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title ~22pt, axis labels ~20pt, tick
+          labels ~16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths well-proportioned for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and yellow are distinguishable for most colorblind types, but
+          yellow could have slightly better contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though legend could be closer to plot area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Age Group" and "Population (thousands)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, but legend shows Female before Male
+          which contradicts visual order (Male/blue is on left)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pyramid/butterfly chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on central axis, values extend left/right correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has distinct colors per side, legend, symmetric axis scales
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric scale from -6000 to 6000 shows all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Male and Female
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{description} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows asymmetry between genders (females live longer - more 80+),
+          different distributions, but variation could be slightly more pronounced
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Population pyramid by age and gender is the classic real-world use
+          case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in thousands are realistic for a city/region, though labeled
+          as "thousands" the raw values appear to represent actual counts
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_col, coord_flip, scale_fill_manual,
+          and theme customization. Could leverage more plotnine-specific features
+          like position adjustments or stat transformations
+  verdict: APPROVED
diff --git a/plots/pyramid-basic/metadata/pygal.yaml b/plots/pyramid-basic/metadata/pygal.yaml
index 4f0176ee18..76321c01df 100644
--- a/plots/pyramid-basic/metadata/pygal.yaml
+++ b/plots/pyramid-basic/metadata/pygal.yaml
@@ -23,3 +23,177 @@ review:
   weaknesses:
   - Legend placement appears in top-left despite legend_at_bottom=True setting
   - Font sizes in custom_style deviate from library rules guide recommendations
+  image_description: The plot displays a population pyramid showing US 2023 age-gender
+    distribution. The chart uses a horizontal bar format with blue bars extending
+    left for Male population and yellow/gold bars extending right for Female population.
+    Age groups (0-9 through 80+) are labeled along the left vertical axis. The x-axis
+    shows "Population (millions)" ranging from -8 to 8. The title "pyramid-basic ·
+    pygal · pyplots.ai" appears at the top center. A legend in the top-left corner
+    identifies Female (yellow) and Male (blue). The pyramid shape clearly shows the
+    bulge in middle-age groups (40-49 being widest) tapering toward both younger and
+    older age groups.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and age group labels are clearly readable. Legend
+          text is somewhat small in proportion to canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible, pyramid shape is distinct.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, high contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, but legend placement in top-left creates
+          slight imbalance.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has "Population (millions)" with units. Y-axis age groups
+          are descriptive.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is positioned far from the plot in the top-left corner, separated
+          from the chart.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct pyramid chart type using pygal.Pyramid().
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (age groups) on central axis, values extending left/right
+          correctly.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Two opposing bar series, shared central axis, distinct colors, legend
+          present.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, symmetric scale (-8 to 8).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Female and Male.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "pyramid-basic · pygal · pyplots.ai".'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows age distribution pattern with clear pyramid shape, asymmetry
+          between genders visible (females live longer - 80+ has larger female bar).
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: US 2023 population estimates is a perfect real-world scenario for
+          population pyramids.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Population values in millions are realistic for US demographics.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → style → chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values, no random).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses pygal.Pyramid which is the native chart type, but font sizes
+          don't follow the library rules guide (recommends smaller font sizes). The
+          implementation increases font sizes significantly beyond the documented
+          guidelines.
+  verdict: APPROVED
diff --git a/plots/pyramid-basic/metadata/seaborn.yaml b/plots/pyramid-basic/metadata/seaborn.yaml
index 0905b54254..9803e59a3a 100644
--- a/plots/pyramid-basic/metadata/seaborn.yaml
+++ b/plots/pyramid-basic/metadata/seaborn.yaml
@@ -20,4 +20,162 @@ review:
   - Clean data structure using pandas DataFrame with negative values for left-side
     bars
   - Well-placed legend and subtle grid lines enhance readability
-  weaknesses: []
+  weaknesses:
+  - None significant - this is publication-quality work
+  image_description: The plot displays a population pyramid showing age distribution
+    by gender. Blue bars (#306998) extend left from the center representing male population,
+    while gold/yellow bars (#FFD43B) extend right representing female population.
+    The Y-axis shows 9 age groups from "0-9" at top to "80+" at bottom. The X-axis
+    shows "Population (thousands)" with symmetric scale from 6,000 on left to 6,000
+    on right, with tick labels showing absolute values. A subtle black vertical line
+    marks the center axis at 0. The title "pyramid-basic · seaborn · pyplots.ai" is
+    at the top, and a legend in the upper right identifies Male (blue) and Female
+    (gold). Dashed grid lines aid readability.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text perfectly readable at proper sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bars well-sized with good width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue/gold palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent use of canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: includes units "Population (thousands)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: grid appropriate, legend well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct pyramid/butterfly chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: categories on Y-axis, values on X-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: symmetric axes, central line, distinct colors, legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: correctly identifies Male/Female
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses {spec-id} · {library} · pyplots.ai
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows full pyramid with visible gender asymmetry
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: real-world population demographics scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: realistic population values in thousands
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → plot → save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: deterministic data (fixed arrays)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only used imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/qq-basic/metadata/altair.yaml b/plots/qq-basic/metadata/altair.yaml
index f716758903..db28e180f4 100644
--- a/plots/qq-basic/metadata/altair.yaml
+++ b/plots/qq-basic/metadata/altair.yaml
@@ -26,3 +26,177 @@ review:
     dense regions
   - Axis labels could include context hint (e.g., Sample Quantiles (sorted values))
   - The manual inverse CDF implementation, while impressive, adds code complexity
+  image_description: The plot displays a Q-Q (Quantile-Quantile) plot with blue filled
+    circular points (#306998, Python blue) against a light gray background. A yellow
+    dashed diagonal reference line (y=x) runs from the lower left to upper right.
+    The X-axis is labeled "Theoretical Quantiles" and Y-axis "Sample Quantiles", both
+    ranging approximately from 10-90. The title "qq-basic · altair · pyplots.ai" appears
+    at the top in a clear, readable font. Points generally follow the reference line
+    through the center but deviate above it in the upper right region, clearly demonstrating
+    the intended slight right skew in the data. The grid uses subtle dashed lines
+    with low opacity. Point sizes are appropriate for the 100 data points.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points size=200 with alpha=0.7 is good for 100 points, though slightly
+          large
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points on white with yellow reference line - excellent contrast,
+          colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, slight extra whitespace
+          on left edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Theoretical Quantiles" and "Sample Quantiles"
+          but no units (not applicable for quantiles, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3 with dashed style, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Q-Q plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theoretical quantiles on X, sample quantiles on Y - correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 45-degree reference line present, proper axis labels, demonstrates
+          skewness
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend required for this plot type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "qq-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows main Q-Q interpretation (deviation from line = skewness), but
+          could show more extreme tails
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible statistical scenario with 80 normal + 20 right-skewed observations
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values centered around 50 with std ~15, sensible range
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: altair, numpy, pandas'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html correctly, but the manual inverse CDF
+          implementation is overly complex when scipy.stats.norm.ppf would suffice
+          (though this avoids scipy dependency)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's layering (reference_line + points), tooltips for interactivity,
+          configure_axis/configure_view for styling. Could use more Altair-specific
+          features like selections or conditional encoding.
+  verdict: APPROVED
diff --git a/plots/qq-basic/metadata/bokeh.yaml b/plots/qq-basic/metadata/bokeh.yaml
index c958543f86..4f117480e1 100644
--- a/plots/qq-basic/metadata/bokeh.yaml
+++ b/plots/qq-basic/metadata/bokeh.yaml
@@ -24,3 +24,179 @@ review:
   - Data could show more diverse distribution characteristics (heavy tails, outliers)
     to better demonstrate Q-Q plot interpretation
   - Could leverage Bokeh interactivity (hover tooltips showing exact quantile values)
+  image_description: 'The plot displays a Q-Q (Quantile-Quantile) plot with blue circular
+    markers (color #306998) plotted against a light gray background (#fafafa). The
+    x-axis is labeled "Theoretical Quantiles" and the y-axis is labeled "Sample Quantiles",
+    both ranging approximately from -2.5 to 2.5. A yellow/gold dashed diagonal reference
+    line (y=x) runs from bottom-left to top-right. The title "qq-basic · bokeh · pyplots.ai"
+    appears in the top-left corner. The data points generally follow the reference
+    line but show slight deviation in the upper-right region (positive quantiles),
+    indicating the right-skewed data characteristic from the mixed normal distribution.
+    The grid is subtle with dashed lines at alpha 0.3. Overall, the plot has good
+    proportions with the chart area well-utilized.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all clearly
+          readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (size=20) with good alpha (0.7) for 100 data
+          points; slightly on the smaller side but clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue markers on light background with yellow reference line - colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Theoretical Quantiles", "Sample Quantiles")
+          but no units (standard for z-scores)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed) which is good, but no legend present
+          to identify the reference line
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Q-Q plot comparing sample quantiles to theoretical normal
+          distribution
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theoretical quantiles on X-axis, sample quantiles on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has diagonal reference line (y=x), proper axis labels, shows deviation
+          from normality
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible, axes appropriately scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this simple plot type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "qq-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows deviation from normality in upper tail (right skew), but could
+          demonstrate more distribution characteristics (e.g., outliers, S-curve)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible mixed normal distribution scenario, though generic (not
+          tied to a real-world application)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Z-scores ranging approximately -2.5 to 2.5 are appropriate for standardized
+          data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and Slope model which are Bokeh-specific, but
+          doesn't leverage more advanced Bokeh features like hover tooltips or interactive
+          capabilities
+  verdict: APPROVED
diff --git a/plots/qq-basic/metadata/highcharts.yaml b/plots/qq-basic/metadata/highcharts.yaml
index 89738b1fd8..870836af3c 100644
--- a/plots/qq-basic/metadata/highcharts.yaml
+++ b/plots/qq-basic/metadata/highcharts.yaml
@@ -25,3 +25,172 @@ review:
   - Grid lines could be slightly more subtle (currently alpha 0.15, could use 0.1)
   - Axis labels could include units or more context (e.g., Theoretical Quantiles (Standard
     Deviations))
+  image_description: 'The plot displays a Q-Q (Quantile-Quantile) plot with a white
+    background. Blue circular markers (Python blue color #306998 with 0.7 alpha) represent
+    sample quantiles plotted against theoretical quantiles. A dashed yellow reference
+    line (y=x) runs diagonally across the plot. The title "qq-basic · highcharts ·
+    pyplots.ai" appears at the top in bold. The x-axis is labeled "Theoretical Quantiles"
+    and the y-axis "Sample Quantiles". Both axes range approximately from 6 to 102.
+    A legend in the top-right corner identifies the reference line and sample quantiles
+    (N=100). The data points follow the reference line closely in the middle range
+    but show slight deviation at the upper end, indicating the intended right skew
+    in the data.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 72px, axis titles at 48px, tick labels at 36px - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker radius 18 with alpha 0.7 is optimal for 100 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe Python blue (#306998) and yellow (#FFD43B)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but missing units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle with dashed style, but alpha 0.15 could be slightly
+          lower; legend well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Q-Q scatter plot with reference line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theoretical quantiles on X, sample quantiles on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Reference line (y=x), scatter points, proper axis labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies reference line and sample quantiles with
+          N=100
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "qq-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows slight right skew demonstrating Q-Q interpretation, but could
+          show more extreme deviations
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Statistical data comparison is a real use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values centered around 50 with reasonable spread
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts_core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Highcharts series configuration, legend options, interactive
+          HTML export
+  verdict: APPROVED
diff --git a/plots/qq-basic/metadata/letsplot.yaml b/plots/qq-basic/metadata/letsplot.yaml
index fc5f7eebc1..b05206dc1b 100644
--- a/plots/qq-basic/metadata/letsplot.yaml
+++ b/plots/qq-basic/metadata/letsplot.yaml
@@ -25,3 +25,182 @@ review:
   - Grid lines could be slightly more subtle (current alpha appears around 0.5, could
     be 0.3)
   - Wildcard import from lets_plot requires multiple noqa comments
+  image_description: The plot displays a Q-Q (Quantile-Quantile) plot comparing sample
+    data against a theoretical normal distribution. Blue points (#306998, Python blue)
+    represent the quantile pairs, plotted against a yellow dashed reference line (#FFD43B,
+    Python gold) indicating the y=x line for perfect normality. The X-axis is labeled
+    "Theoretical Quantiles" and the Y-axis "Sample Quantiles", both ranging from approximately
+    -3 to 3. The title correctly shows "qq-basic · letsplot · pyplots.ai". The plot
+    clearly demonstrates right skewness in the data - points follow the reference
+    line closely in the lower/middle region but deviate upward above the line in the
+    upper quantiles (above ~0.5 theoretical quantiles), indicating the sample has
+    heavier right tail than a normal distribution. The visualization uses a minimal
+    theme with subtle gray dashed grid lines on a white background.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          target resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=6) with appropriate alpha (0.75) for
+          100 data points; slightly larger could help
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points on white background with yellow line; excellent contrast
+          and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with 16:9 aspect ratio, balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Theoretical Quantiles" and "Sample Quantiles"
+          but no units (though not applicable for standardized quantiles)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle dashed grid with alpha, no legend needed for single series;
+          grid could be slightly more subtle
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Q-Q plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theoretical quantiles on X-axis, sample quantiles on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line present and clearly visible, quantile comparison
+          shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full range of data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed/no issues
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title "qq-basic · letsplot · pyplots.ai" is correct but slightly
+          small in appearance
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows right skewness deviation pattern; could also show other distribution
+          characteristics
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated data with intentional skew is appropriate for demonstrating
+          Q-Q plot interpretation
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Standardized quantiles with appropriate range (-2.5 to 2.5); 100
+          points is within recommended range
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: Uses "from lets_plot import *" with noqa comments; functional but
+          wildcard import
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html correctly
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses lets-plot grammar of graphics
+        score: 4
+        max: 5
+        passed: true
+        comment: ggplot() + geom_point/geom_line with theme_minimal; good use of ggsize
+          for scaling. Manual ppf calculation is thorough but could potentially use
+          scipy for cleaner code if available.
+  verdict: APPROVED
diff --git a/plots/qq-basic/metadata/matplotlib.yaml b/plots/qq-basic/metadata/matplotlib.yaml
index 72173cf0ac..3d3dfe3d9e 100644
--- a/plots/qq-basic/metadata/matplotlib.yaml
+++ b/plots/qq-basic/metadata/matplotlib.yaml
@@ -24,3 +24,172 @@ review:
     of scipy.stats.norm.ppf - adds unnecessary code complexity
   - Grid/legend section could be improved with legend placed outside plot area or
     better positioned
+  image_description: The plot displays a Q-Q plot comparing sample quantiles against
+    theoretical (normal) quantiles. Points are rendered as blue circles (#306998)
+    with white edges, sized appropriately (s=200) with alpha=0.7 transparency. A yellow
+    dashed reference line (y=x) runs diagonally across the plot. The x-axis is labeled
+    "Theoretical Quantiles" and ranges from approximately -2.5 to 2.5. The y-axis
+    is labeled "Sample Quantiles" with the same range. The title reads "qq-basic ·
+    matplotlib · pyplots.ai" in the correct format. A legend in the upper left identifies
+    the reference line. The plot clearly shows the data's right skew through deviation
+    from the reference line at higher quantiles, with points falling below the line
+    in the upper-right region. The grid is subtle with alpha=0.3 and dashed styling.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, ticks at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized at s=200 with alpha=0.7, optimal for 100 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow combination is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (quantiles are unitless, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is good at alpha=0.3, but legend placement in upper left slightly
+          overlaps the plot area where it might interfere with data in some cases
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Q-Q plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theoretical quantiles on X, sample quantiles on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Reference line present, sample-to-theoretical comparison shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies reference line
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "qq-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows right skew deviation from normality, but could also show light/heavy
+          tails more distinctly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plausible dataset with normal bulk plus right tail asymmetry
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Z-scores in sensible range (-2.5 to 2.5)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear structure with imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses custom inverse normal implementation instead of scipy.stats.norm.ppf
+          (not deprecated, but unnecessarily complex)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ax methods, proper styling, but could leverage matplotlib's
+          built-in stats visualization capabilities
+  verdict: APPROVED
diff --git a/plots/qq-basic/metadata/plotly.yaml b/plots/qq-basic/metadata/plotly.yaml
index f4337c7f0d..61108fb442 100644
--- a/plots/qq-basic/metadata/plotly.yaml
+++ b/plots/qq-basic/metadata/plotly.yaml
@@ -22,3 +22,177 @@ review:
   - Manual implementation of inverse normal CDF is overly complex when scipy.stats
     could simplify this significantly
   - Axis labels lack context (could note these are standardized quantiles)
+  image_description: The plot displays a Q-Q (Quantile-Quantile) plot with a white
+    background and subtle gray grid lines. Blue circular markers (#306998) represent
+    the sample quantiles plotted against theoretical quantiles. A yellow/gold dashed
+    diagonal reference line (y=x) runs from the lower-left to upper-right. The title
+    "qq-basic · plotly · pyplots.ai" appears at the top in dark text. The x-axis is
+    labeled "Theoretical Quantiles" and the y-axis "Sample Quantiles", both ranging
+    approximately from -2.5 to 2.5. A legend in the upper-left shows "Sample Quantiles"
+    (blue circle) and "Reference (y=x)" (yellow dashed line). The data points largely
+    follow the reference line but show slight deviation in the upper-right portion,
+    indicating the positive skew from the mixed distribution in the data.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized at 14 with 0.7 opacity work well for 100 points, though
+          slightly on the larger side
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue markers and yellow line provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (though none are needed for standardized
+          quantiles)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at 0.1 alpha is appropriately subtle, legend well-positioned
+          but could use slightly more prominence
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Q-Q plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theoretical quantiles on X, sample quantiles on Y as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: comparison to normal distribution, diagonal
+          reference line, proper axis labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show complete data range with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies both traces
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format `qq-basic · plotly · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows deviation from normality (positive skew) which demonstrates
+          Q-Q plot interpretation; could show more extreme deviations
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible statistical data scenario; mixture of normals is a valid
+          demonstration
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standardized values in appropriate range (-2.5 to 2.5)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but the manual inverse normal calculation
+          is unnecessarily complex
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses plotly's interactive features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter, proper layout configuration, generates HTML for
+          interactivity. However, could leverage plotly's built-in statistical features
+          more
+  verdict: APPROVED
diff --git a/plots/qq-basic/metadata/plotnine.yaml b/plots/qq-basic/metadata/plotnine.yaml
index 333501eab3..21aac49013 100644
--- a/plots/qq-basic/metadata/plotnine.yaml
+++ b/plots/qq-basic/metadata/plotnine.yaml
@@ -23,3 +23,172 @@ review:
   - Does not use plotnine built-in stat_qq or stat_qq_line for more idiomatic implementation
   - Axis labels could include context (e.g., Standard Normal in theoretical quantile
     label)
+  image_description: The plot displays a Q-Q (Quantile-Quantile) plot comparing sample
+    quantiles against theoretical normal quantiles. Blue circular points (#306998)
+    with alpha=0.7 are plotted along a yellow dashed diagonal reference line (#FFD43B).
+    The title "qq-basic · plotnine · pyplots.ai" is centered at the top. The x-axis
+    is labeled "Theoretical Quantiles" and the y-axis is labeled "Sample Quantiles",
+    both ranging roughly from -2.5 to 2.5. The plot uses a clean minimal theme with
+    subtle gray gridlines. The 100 data points follow the reference line closely in
+    the center but show slight deviation in the upper right tail (above the line),
+    correctly demonstrating the right-skewed data generation. The layout is well-balanced
+    with good use of the 16:9 canvas.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points well-sized (size=4) with good alpha=0.7 for 100 points; slightly
+          larger could help
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (N/A for standardized quantiles)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (minimal theme), no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Q-Q plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Theoretical on X, Sample on Y - correct assignment
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line clearly visible, points show distribution
+          comparison
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this plot type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: qq-basic · plotnine · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows deviation from normality in right tail, but could show more
+          varied distribution characteristics
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Demonstrates normality testing context well with mixed normal components
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standardized quantiles in sensible range (-2.5 to 2.5)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Fixed seed (np.random.seed(42))
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly but no advanced plotnine features like
+          stat_qq or faceting
+  verdict: APPROVED
diff --git a/plots/qq-basic/metadata/pygal.yaml b/plots/qq-basic/metadata/pygal.yaml
index 36e37fcba5..cbbf9b21c1 100644
--- a/plots/qq-basic/metadata/pygal.yaml
+++ b/plots/qq-basic/metadata/pygal.yaml
@@ -29,3 +29,185 @@ review:
     would simplify the code
   - Marker size (dots_size=12) could be slightly larger for better visibility at this
     resolution
+  image_description: The plot displays a Q-Q (Quantile-Quantile) plot with a white
+    background. Blue circular markers (100 points) represent sample data points plotted
+    against theoretical normal quantiles. A gray diagonal reference line (y=x) runs
+    from bottom-left to top-right. The title "qq-basic · pygal · pyplots.ai" appears
+    at the top in dark gray text. The x-axis is labeled "Theoretical Quantiles" and
+    the y-axis "Sample Quantiles", both ranging from approximately -2.8 to 2.8. A
+    subtle grid is visible with light gray vertical and horizontal lines. The legend
+    at the bottom shows "Sample Data" (blue) and "Reference (y=x)" (gray). The data
+    points follow the reference line closely in the center but deviate in the upper
+    tail (showing right skew as intended), demonstrating the plot's ability to reveal
+    distribution characteristics.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable at full resolution.
+          Font sizes are appropriately scaled for 4800x2700.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers (dots_size=12) are well-sized for 100 data points. Slightly
+          larger could improve visibility but current size is adequate.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) for data points and gray for reference line provide
+          excellent contrast and are colorblind-safe.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space. Plot fills most of the area with balanced
+          margins. Legend at bottom is well-positioned.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Theoretical Quantiles" and "Sample Quantiles" are descriptive and
+          appropriate for a Q-Q plot.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is visible and helpful but could be more subtle. Legend is well
+          placed at bottom.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements a Q-Q plot comparing sample quantiles to theoretical
+          normal quantiles.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows theoretical quantiles, Y-axis shows sample quantiles
+          as specified.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes diagonal reference line (y=x), proper axis labels, and clear
+          visualization of distribution comparison.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate margins.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Sample Data" and "Reference (y=x)".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows required format: "qq-basic · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Data demonstrates right skew deviation from normality as intended.
+          Shows points following the line in the center and deviating in the tail.
+          Could show more extreme deviations.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Sample data with normal + exponential mixture is a plausible demonstration
+          of deviation from normality. Context is generic but appropriate for a basic
+          Q-Q plot.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are in standard normal range (-2.8 to 2.8), which is appropriate
+          for a Q-Q plot.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Code follows imports → data → plot → save structure without functions
+          or classes.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (math, numpy, pygal, Style) are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as both plot.png and plot.html (correct for pygal).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart, custom Style configuration, and proper PNG/HTML
+          rendering. The manual implementation of inverse normal CDF (Beasley-Springer-Moro
+          algorithm) is notable but not a pygal-specific feature.
+  verdict: APPROVED
diff --git a/plots/qq-basic/metadata/seaborn.yaml b/plots/qq-basic/metadata/seaborn.yaml
index b17b8087b2..ef36426deb 100644
--- a/plots/qq-basic/metadata/seaborn.yaml
+++ b/plots/qq-basic/metadata/seaborn.yaml
@@ -25,3 +25,178 @@ review:
   - Could leverage more distinctive seaborn features beyond basic scatterplot
   - Data example shows only right skewness but not other Q-Q characteristics like
     heavy tails or S-curves mentioned in specification
+  image_description: 'The plot displays a Q-Q (Quantile-Quantile) plot with 200 blue
+    scatter points (color #306998) plotted against theoretical normal distribution
+    quantiles. A yellow dashed reference line (y=x, color #FFD43B) runs diagonally
+    from lower-left to upper-right. The X-axis is labeled "Theoretical Quantiles"
+    and the Y-axis is labeled "Sample Quantiles", both ranging approximately from
+    -3 to +3. The title correctly follows the format "qq-basic · seaborn · pyplots.ai".
+    Points follow the reference line closely in the center but deviate above the line
+    in the upper right tail, demonstrating the intended right skewness from the mixture
+    distribution. A legend in the upper-left corner indicates "Reference (y=x)". The
+    grid uses subtle dashed lines with low alpha. The plot uses the full 16:9 canvas
+    effectively with balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size s=200 with alpha=0.7 is well-suited for 200 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points with yellow reference line, high contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Theoretical Quantiles", "Sample Quantiles")
+          but no units (quantiles are dimensionless, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with alpha=0.3 dashed lines, legend well-placed in upper
+          left
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Q-Q plot comparing sample to theoretical normal distribution
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=theoretical quantiles, Y=sample quantiles correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: Q-Q scatter, reference line, correct
+          axis labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points including extremes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels reference line as "Reference (y=x)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "qq-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows right tail deviation (skewness) but does not demonstrate other
+          Q-Q characteristics like heavy tails or S-curves mentioned in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plausible scenario with mixture distribution representing real-world
+          data (e.g., measurements with outliers)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Standardized quantiles in sensible range (-3 to +3), though original
+          data scale not shown
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Structure is imports → data → plot → save, but the manual inverse
+          normal CDF implementation adds complexity
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib.pyplot, numpy, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot() which is seaborn's axes-level function, but
+          does not leverage seaborn's statistical capabilities like regplot or more
+          distinctive seaborn features
+  verdict: APPROVED
diff --git a/plots/quiver-basic/metadata/altair.yaml b/plots/quiver-basic/metadata/altair.yaml
index 1c07ef4de5..c5bfdfcdbb 100644
--- a/plots/quiver-basic/metadata/altair.yaml
+++ b/plots/quiver-basic/metadata/altair.yaml
@@ -26,3 +26,178 @@ review:
   - Grid is slightly too subtle at alpha 0.3
   - Arrowheads at center (low magnitude vectors) are very small and harder to see
   - No tooltips or interactivity despite Altair strength in this area
+  image_description: The plot displays a 15x15 grid of vector arrows showing a circular
+    rotation pattern (u = -y, v = x). The arrows form a clear counter-clockwise rotational
+    flow around the center. Colors use the viridis colormap, ranging from dark purple
+    at the center (low magnitude ~0) through teal/green in the middle to bright yellow
+    at the corners (high magnitude ~2.8). The title reads "quiver-basic · altair ·
+    pyplots.ai" at the top. X and Y axes are labeled "X Position" and "Y Position"
+    respectively, ranging from -2.5 to 2.5. A "Magnitude" legend appears on the right
+    side. Arrows have proper arrowheads and uniform spacing across the grid.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or arrow elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Arrows are well-sized and visible; arrowheads could be slightly larger
+          for better visibility at smaller magnitudes
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, balanced margins, plot fills ~60% of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Position" and "Y Position" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is too faint (alpha 0.3) and legend is positioned okay but could
+          be closer to plot
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct quiver/vector field plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y positions and U/V vectors correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: grid positioning, vector direction/length,
+          color encoding magnitude'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Magnitude" with proper color scale
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format "quiver-basic · altair · pyplots.ai" but middle dot
+          separator could be more visible
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows rotation pattern well, magnitude variation from center to edges;
+          could show different field patterns but circular rotation is appropriate
+          for basic example
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Circular rotation field (u=-y, v=x) is a classic physics example
+          representing vortex/rotational flow
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Grid range of -2 to 2 is sensible; 15x15 grid (225 arrows) provides
+          good coverage
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) (though not strictly needed for deterministic
+          data)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative syntax with mark_rule for arrow construction,
+          color encoding with viridis scheme. Creative solution for arrows using rule
+          marks with manual arrowhead construction. Could leverage more Altair-specific
+          features like interactivity or tooltips.
+  verdict: APPROVED
diff --git a/plots/quiver-basic/metadata/bokeh.yaml b/plots/quiver-basic/metadata/bokeh.yaml
index 2fa4aae7a1..72db5cbe23 100644
--- a/plots/quiver-basic/metadata/bokeh.yaml
+++ b/plots/quiver-basic/metadata/bokeh.yaml
@@ -26,3 +26,176 @@ review:
     length proportional to magnitude)
   - Grid lines almost invisible due to alpha and dashes combination
   - Axis labels lack units
+  image_description: The plot displays a 15x15 grid of blue arrows arranged on a white
+    background, clearly showing a circular rotation vector field pattern (u = -y,
+    v = x). The arrows rotate counterclockwise around the origin at (0,0). The title
+    "quiver-basic · bokeh · pyplots.ai" appears in the top-left corner in gray text.
+    The X-axis is labeled "X Position" and the Y-axis is labeled "Y Position", both
+    ranging from approximately -2.5 to 2.5. Arrows are uniformly sized (normalized)
+    with Python Blue color (#306998). The grid lines are very subtle (barely visible).
+    The arrows have proper triangular arrowheads pointing in the direction of the
+    vector field. The layout utilizes the full canvas well with good spacing between
+    arrows.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable but could be slightly larger for
+          the 4800x2700 canvas. Tick labels are adequate.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, arrows well-spaced on the 15x15 grid
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Arrows are well-sized with clear arrowheads, appropriately scaled
+          for data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (Python Blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though plot area could be slightly larger
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Position", "Y Position") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is present but extremely subtle (almost invisible)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct quiver/vector field plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y positions and U/V vectors correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: arrows with direction and magnitude, uniform
+          grid spacing'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full -2.5 to 2.5 range, containing all arrows
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-color plot (appropriate)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "quiver-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows circular rotation pattern well; however, arrows are normalized
+          so magnitude information is lost (spec mentions "length proportional to
+          magnitude")
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Circular rotation flow field is a realistic physics example (angular
+          momentum, vortex)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid from -2 to 2 with 15 points is sensible, values are appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) (though not strictly needed for deterministic
+          math)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, segment, and patches for custom arrow rendering.
+          This is good use of Bokeh's low-level drawing primitives, though doesn't
+          leverage Bokeh-specific interactive features.
+  verdict: APPROVED
diff --git a/plots/quiver-basic/metadata/highcharts.yaml b/plots/quiver-basic/metadata/highcharts.yaml
index 27290ca347..ed7db9b13d 100644
--- a/plots/quiver-basic/metadata/highcharts.yaml
+++ b/plots/quiver-basic/metadata/highcharts.yaml
@@ -26,3 +26,183 @@ review:
     (no functions)'
   - Grid lines configured but not visible in the output - the dashed grid was specified
     but does not appear
+  image_description: 'The plot displays a quiver (vector field) visualization showing
+    a circular wind flow pattern. The chart has a white background with arrows positioned
+    on a 12x12 grid spanning from -4 to 4 on both axes. Arrows are color-coded by
+    magnitude: cyan (Low), green (Medium), yellow (High), and orange (Very High).
+    The circular counterclockwise rotation pattern is clearly visible - arrows near
+    the center are small (cyan/low magnitude) while arrows at the periphery are larger
+    (orange/very high magnitude). The title "Circular Wind Flow · quiver-basic · highcharts
+    · pyplots.ai" appears at the top. X and Y axis labels show "Position (grid units)".
+    A legend at the bottom displays the four magnitude categories with their corresponding
+    colors.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend all clearly readable at
+          high resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or data elements, arrows well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Arrows are well-sized and clearly visible; some inner arrows near
+          center are quite small but this is appropriate for the magnitude encoding
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Cyan, green, yellow, orange palette is colorblind-safe, avoids red-green
+          confusion
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout overall, plot fills canvas well, slight asymmetry in
+          arrow distribution due to filtering near-zero vectors
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "X Position (grid units)", "Y Position
+          (grid units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid lines are visible but dashed style works; legend is well-placed.
+          However, the grid is not visible in the final output (missing gridlines
+          entirely)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct quiver/vector field representation using arrows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y positions and U/V components correctly mapped to arrow placement
+          and direction
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uniform grid, directional arrows, magnitude encoding via color, circular
+          rotation pattern
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range from -4 to 4
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels magnitude categories (Low, Medium, High,
+          Very High)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Circular Wind Flow · quiver-basic · highcharts
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows circular rotation pattern well, magnitude variation visible;
+          could show more diverse vector patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Wind flow is a plausible scenario, though the perfect mathematical
+          circular pattern is somewhat idealized
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid units from -3 to 3 with appropriate vector magnitudes
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Has a helper function `magnitude_to_color` which violates strict
+          KISS principle, but overall structure is logical
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` (though data is deterministic anyway)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly, but also saves plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Highcharts line series with null separators for discrete
+          arrows, proper chart configuration, and legend customization. Creative approach
+          to implement quiver plot in a library that does not natively support it.
+  verdict: APPROVED
diff --git a/plots/quiver-basic/metadata/letsplot.yaml b/plots/quiver-basic/metadata/letsplot.yaml
index 1c1645b5b3..174fc6839e 100644
--- a/plots/quiver-basic/metadata/letsplot.yaml
+++ b/plots/quiver-basic/metadata/letsplot.yaml
@@ -24,3 +24,172 @@ review:
   - Axis labels lack units (e.g., "X Position (units)" or context like "meters")
   - Grid lines could be slightly more subtle (alpha slightly lower)
   - Could explore additional lets-plot interactive features for the HTML output
+  image_description: The plot displays a rotation vector field with arrows arranged
+    in a 15x15 grid spanning from -3 to 3 on both axes. The arrows form a clear counter-clockwise
+    circular rotation pattern around the origin. Arrow colors transition from dark
+    blue (near origin, low magnitude) to yellow/gold (at edges, high magnitude) using
+    a gradient color scale. The title reads "Rotation Vector Field · quiver-basic
+    · letsplot · pyplots.ai". Axis labels are "X Position" and "Y Position". A "Magnitude"
+    legend on the right shows the color scale from 0 to 4. The plot uses a minimal
+    theme with subtle grid lines and has good visual balance.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend text are all clearly
+          readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or arrows; grid spacing is well-chosen
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Arrows are well-sized and visible; some near-center arrows are small
+          but this correctly reflects their low magnitude
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-to-yellow gradient is colorblind-safe (avoids red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Position" and "Y Position" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend is well-placed; grid could be slightly more
+          subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct quiver/vector field visualization using arrows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y positions and U/V vector components correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: grid positioning, direction arrows, magnitude
+          encoding via color'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Magnitude legend is accurate and clear
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title uses correct format: "quiver-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows rotation pattern well; could benefit from showing additional
+          field types
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic rotation field u=-y, v=x is a fundamental physics example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Grid coordinates -3 to 3 are sensible; 15x15 grid (225 arrows) is
+          within spec range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic output
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_segment with arrow(), scale_color_gradient, theme_minimal;
+          could leverage more lets-plot specific interactivity features
+  verdict: APPROVED
diff --git a/plots/quiver-basic/metadata/matplotlib.yaml b/plots/quiver-basic/metadata/matplotlib.yaml
index 299d6a4e7b..f2abc54343 100644
--- a/plots/quiver-basic/metadata/matplotlib.yaml
+++ b/plots/quiver-basic/metadata/matplotlib.yaml
@@ -23,3 +23,168 @@ review:
   - Clean, readable code following KISS principles
   weaknesses:
   - Axis labels lack units (though acceptable for dimensionless mathematical coordinates)
+  image_description: The plot displays a quiver (vector field) visualization showing
+    a circular rotation pattern on a 15×15 grid spanning from -3 to 3 on both axes.
+    Arrows represent vectors with direction indicating flow and color encoding magnitude
+    via the viridis colormap. Smaller arrows near the center appear in dark purple/blue
+    (low magnitude ~0-1), while larger arrows at the periphery are shown in yellow/green
+    (high magnitude ~3.5-4.2). The title "quiver-basic · matplotlib · pyplots.ai"
+    is displayed at the top. The X and Y axes are labeled "X Position" and "Y Position"
+    respectively. A vertical colorbar on the right shows "Vector Magnitude" ranging
+    from 0 to ~4.2. A subtle dashed grid is visible in the background.
+  criteria_checklist:
+    visual_quality:
+      score: 39
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or arrows
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Arrows well-sized for grid density, clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses viridis colormap which is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but no units (acceptable for dimensionless coordinates)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha=0.3), colorbar well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct quiver plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y grid positions with U/V vector components correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: All spec features present including optional color encoding for magnitude
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All vectors visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar labeled "Vector Magnitude"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exact format: "quiver-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows circular rotation pattern (u=-y, v=x), varying magnitudes,
+          full 2D field
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic mathematical rotation field used in physics/fluid dynamics
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 15×15 grid with -3 to 3 range is appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of quiver parameters (scale, width, headwidth, headlength,
+          headaxislength) with colormap integration
+  verdict: APPROVED
diff --git a/plots/quiver-basic/metadata/plotly.yaml b/plots/quiver-basic/metadata/plotly.yaml
index f36ee2014f..e47da5dc92 100644
--- a/plots/quiver-basic/metadata/plotly.yaml
+++ b/plots/quiver-basic/metadata/plotly.yaml
@@ -25,3 +25,180 @@ review:
   - Axis labels lack units
   - Zero lines are slightly more prominent than grid lines
   - X-axis range extends unnecessarily wide when data only spans -2 to 2
+  image_description: The plot displays a 15x15 grid of blue arrows (#306998) on a
+    white background, visualizing a circular rotation vector field. Each arrow represents
+    a vector with direction following the pattern u = -y, v = x, creating a counterclockwise
+    rotation around the origin. Colored circular markers at each arrow base encode
+    magnitude using the Viridis colorscale (purple at center for low magnitude, yellow
+    at corners for high magnitude). The title "Circular Flow Field · quiver-basic
+    · plotly · pyplots.ai" is centered at the top. Axis labels read "X Position" and
+    "Y Position". A vertical colorbar on the right shows magnitude values from ~0.3
+    to ~2.8. The grid is subtle with alpha transparency, and zero lines are visible
+    at x=0 and y=0.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and colorbar text are all clearly
+          readable at the target resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; arrows are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Arrows are visible and distinguishable; marker sizes are appropriate
+          for data density; slight deduction as some arrows near corners could be
+          slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colorscale is colorblind-safe; blue arrows have good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; slight empty space on left and right sides
+          of grid
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Position", "Y Position") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), colorbar well-placed; zero lines are slightly
+          more prominent than necessary
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct quiver/vector field visualization with arrows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y coordinates correctly assigned; u/v components properly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: grid positions, arrow directions, magnitude
+          encoding via color'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible; axes appropriately scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Magnitude"
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Format follows convention but includes extra descriptor "Circular
+          Flow Field" before spec-id
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows circular rotation pattern well; demonstrates varying magnitudes;
+          minor deduction for not showing multiple flow patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Circular rotation is a plausible physics scenario (fluid dynamics,
+          electromagnetic fields); good choice but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are appropriate for the mathematical function; -2 to 2 range
+          is sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save; no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) even though data is deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, figure_factory, graph_objects)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as "plot.png" but also "plot.html" (acceptable for interactive
+          library)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses figure_factory.create_quiver for specialized quiver plot; adds
+          interactive hover templates; generates HTML for interactivity; could leverage
+          more plotly-specific features like animation
+  verdict: APPROVED
diff --git a/plots/quiver-basic/metadata/plotnine.yaml b/plots/quiver-basic/metadata/plotnine.yaml
index cf6b3fab88..9a22748a72 100644
--- a/plots/quiver-basic/metadata/plotnine.yaml
+++ b/plots/quiver-basic/metadata/plotnine.yaml
@@ -23,3 +23,174 @@ review:
   weaknesses:
   - Axis labels could include units or at least "(dimensionless)" notation for a mathematical
     field
+  image_description: The plot displays a 15x15 grid of arrows representing a counter-clockwise
+    rotation vector field (u = -y, v = x). The arrows are positioned on a coordinate
+    system ranging from approximately -4 to 4 on both axes. Arrow colors encode magnitude
+    using a gradient from blue (low magnitude near center) to yellow (high magnitude
+    at edges). The title "quiver-basic · plotnine · pyplots.ai" appears at the top.
+    Axis labels show "X Position" and "Y Position". A color legend labeled "Magnitude"
+    shows values from 0 to 4. The arrows clearly demonstrate the circular flow pattern
+    with smaller arrows near the origin and larger arrows at the periphery.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements; arrows well-spaced on 15x15 grid
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Arrows are clearly visible with good sizing; arrowheads are well-formed
+          and distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-to-yellow gradient (viridis-like) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Position", "Y Position") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with appropriate alpha; legend is well-placed but
+          could be slightly better positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct quiver/vector field visualization using arrows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y coordinates correctly mapped, u/v components correctly determine
+          arrow direction and length
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows vector field with direction and magnitude; color encodes magnitude
+          as optional feature
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Magnitude legend accurately represents arrow color encoding
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "quiver-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows rotation field pattern with varying magnitudes; arrows point
+          in correct rotational directions; magnitude increases from center to edges
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Rotation field (u=-y, v=x) is a classic mathematical example used
+          in physics/CFD
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid size (15x15=225 arrows) and coordinate range (-3 to 3) are appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses geom_segment with arrow() which is valid but plotnine doesn't
+          have a native quiver geom; this is a workaround approach rather than leveraging
+          distinctive plotnine/ggplot2 features
+  verdict: APPROVED
diff --git a/plots/quiver-basic/metadata/seaborn.yaml b/plots/quiver-basic/metadata/seaborn.yaml
index e6c0e7f658..88dafc4fbb 100644
--- a/plots/quiver-basic/metadata/seaborn.yaml
+++ b/plots/quiver-basic/metadata/seaborn.yaml
@@ -24,3 +24,176 @@ review:
     labeling)
   - Data context is purely mathematical rather than tied to a real-world application
     (e.g., wind field, fluid flow)
+  image_description: The plot displays a 15×15 grid of arrows representing a circular
+    rotation vector field (u = -y, v = x). Arrows are colored using the viridis colormap
+    based on vector magnitude—purple/dark blue near the center (low magnitude ~0)
+    transitioning to yellow at the corners (high magnitude ~4.2). The arrows point
+    tangentially around the origin, creating a clear counter-clockwise rotation pattern.
+    The title "quiver-basic · seaborn · pyplots.ai" appears at the top in large font.
+    Axes are labeled "X Position" and "Y Position" with tick marks from -3 to 3. A
+    colorbar on the right displays "Vector Magnitude" (0.0-4.2). The plot has a clean
+    white background with subtle dashed grid lines (alpha ~0.3). All arrows are uniformly
+    spaced and clearly visible with arrowheads.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, ticks at 16pt—all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements; arrows well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Arrows are visible with arrowheads; linewidth 2.5 works well for
+          density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis palette is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with equal aspect ratio; colorbar positioned
+          correctly
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Position", "Y Position") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed); colorbar serves as legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct quiver/vector field visualization using arrows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y grid positions with u/v vector components correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: grid, arrows with direction/length, color
+          encoding magnitude'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points from -3 to 3
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows magnitude scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "quiver-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows rotation pattern clearly with magnitude variation; arrows near
+          center are small (correctly showing low magnitude)
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Mathematical rotation field is plausible but not tied to a real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (-3 to 3) are sensible for demonstrating the mathematical
+          pattern
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) (though data is actually deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (matplotlib, numpy, pandas, seaborn) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.lineplot with units parameter creatively to draw separate
+          arrow segments, and sns.set_theme for styling. However, seaborn doesn't
+          have a native quiver function, so the implementation cleverly works around
+          this limitation using line segments.
+  verdict: APPROVED
diff --git a/plots/radar-basic/metadata/altair.yaml b/plots/radar-basic/metadata/altair.yaml
index a85aa48c7a..a482ce5e22 100644
--- a/plots/radar-basic/metadata/altair.yaml
+++ b/plots/radar-basic/metadata/altair.yaml
@@ -26,3 +26,175 @@ review:
   - Only shows single series when spec allows 1-3 series for comparison
   - Value range (70-90) is somewhat narrow; wider spread would better demonstrate
     strengths and weaknesses
+  image_description: |-
+    The plot displays a hexagonal radar chart showing employee performance scores across 6 competencies. The chart uses a cohesive blue color scheme (#306998) with the following elements:
+    - **Data polygon**: Blue filled area with ~25% transparency, clearly showing the performance profile
+    - **Axes**: 6 spokes radiating from center for Communication (85), Technical Skills (90), Teamwork (75), Problem Solving (88), Leadership (70), and Creativity (82)
+    - **Grid lines**: Hexagonal grid lines at 5 levels (20, 40, 60, 80, 100) with subtle gray color
+    - **Labels**: Category labels positioned outside the chart, value labels near each data point
+    - **Data points**: Solid blue circles at each vertex of the polygon
+    - **Title**: "radar-basic · altair · pyplots.ai" at top in proper format
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable; title, labels, and values are clear. Font sizes
+          are appropriate (22pt for labels, 18pt for values, 28pt title).
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels are well-positioned outside the chart
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Data points and polygon clearly visible; markers sized well (size=500)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart is well-centered with balanced margins; fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Radar chart has no traditional X/Y axes; category labels are descriptive
+          but this criterion doesn't directly apply
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle (opacity=0.3), no legend needed for single series
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on axes, values as distances from center - correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has filled polygon with transparency, gridlines, axis labels, closed
+          polygon. Missing: could use distinct colors for multiple series (spec says
+          1-3 series, only 1 shown)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-100 scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "radar-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows radar chart features well with 6 axes, variation in values
+          (70-90 range). Single series only, but spec allows 1-3.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance review scenario is realistic and matches spec's
+          first application example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (70-90) are realistic performance scores but slightly narrow
+          range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → coordinate calculations
+          → layers → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values, no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used - all needed
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Creative use of GeoJSON/geoshape for polygon fill, layered composition
+          typical of Altair. However, Altair doesn't have native radar chart support,
+          requiring manual polar-to-Cartesian conversion.
+  verdict: APPROVED
diff --git a/plots/radar-basic/metadata/bokeh.yaml b/plots/radar-basic/metadata/bokeh.yaml
index f0b9d258d4..aa407105be 100644
--- a/plots/radar-basic/metadata/bokeh.yaml
+++ b/plots/radar-basic/metadata/bokeh.yaml
@@ -27,3 +27,179 @@ review:
     harder to read exact values
   - Legend is positioned in corner far from the chart - could be integrated better
   - No hover interactivity despite Bokeh being an interactive library
+  image_description: 'The plot displays a radar/spider chart comparing two employees
+    across 6 competency dimensions: Communication, Technical Skills, Teamwork, Problem
+    Solving, Leadership, and Creativity. Employee A is represented by a blue polygon
+    with blue markers, Employee B by a yellow/gold polygon with darker gold markers.
+    The chart has 5 concentric grid circles (representing 20, 40, 60, 80, 100 scale)
+    in light gray. Six axis lines extend from the center to each category label. The
+    title "radar-basic · bokeh · pyplots.ai" appears at the top center. A legend in
+    the top-right corner identifies the two employees. Both polygons are filled with
+    ~25% alpha transparency, allowing overlap visibility. The chart uses a square
+    1:1 aspect ratio with clean white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable at full size, title and category labels clear,
+          though title could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all category labels well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers appropriately sized (20px), lines clearly visible (4px width),
+          good alpha for fill
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-friendly, but could have slightly
+          more contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, radar chart fills space well, legend positioned
+          appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels (competency names) but no units/scale indicators
+          on the grid
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.6), legend readable but positioned far in
+          corner
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on axes, values determine polygon vertices correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygons with transparency (0.25), gridlines at intervals,
+          axis labels, distinct colors, legend, polygon closed
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-100 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Employee A and B
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "radar-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two series with different profiles (A excels at Technical Skills,
+          B at Teamwork/Leadership), demonstrates comparison capability
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance review is a perfect real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in 70-90 range are realistic for employee scores, though range
+          is somewhat narrow
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed as data is hardcoded),
+          but best practice would include np.random.seed for future modifications
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Legend with LegendItem, patch for filled polygons.
+          Could leverage more Bokeh-specific features like hover tooltips or interactive
+          callbacks
+  verdict: APPROVED
diff --git a/plots/radar-basic/metadata/highcharts.yaml b/plots/radar-basic/metadata/highcharts.yaml
index 09d4702e71..a0ea4ceb87 100644
--- a/plots/radar-basic/metadata/highcharts.yaml
+++ b/plots/radar-basic/metadata/highcharts.yaml
@@ -24,3 +24,174 @@ review:
   weaknesses:
   - Radial axis labels could include a unit context like Performance Score (0-100)
     though current implementation is acceptable
+  image_description: 'The radar chart displays a hexagonal spider/web chart with 6
+    axes: Communication (top), Technical Skills (upper-right), Teamwork (lower-right),
+    Problem Solving (bottom), Leadership (lower-left), and Creativity (upper-left).
+    Two filled polygons represent Employee A (blue #306998) and Employee B (yellow
+    #FFD43B), each with ~25% transparency allowing overlap visibility. Employee A
+    shows higher values for Technical Skills (~95) and Problem Solving (~90), while
+    Employee B excels in Communication (~90) and Teamwork (~95). The chart has polygon-interpolated
+    gridlines at 0, 20, 40, 60, 80 intervals. The title "radar-basic · highcharts
+    · pyplots.ai" appears at the top in bold. The legend is centered at the bottom
+    with circle markers for both employees.'
+  criteria_checklist:
+    visual_quality:
+      score: 39
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text clearly readable with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, axis labels well-positioned around the chart
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers visible, line width appropriate, good polygon fill
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow colorblind-safe palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart well-centered with 75% pane size, good canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive category names but radial axis lacks unit context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle gray gridlines at alpha 0.15, legend well-placed at bottom
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart using Highcharts polar mode
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on axes, values on radial scale (0-100)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygons with transparency, gridlines at 20 intervals, labeled
+          axes, distinct colors, legend, closed polygon
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-100 scale visible, all data within range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Employee A and Employee B correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: radar-basic · highcharts · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows comparison of two employees with contrasting profiles (technical-focused
+          vs soft-skills-focused)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance metrics is a common real-world use case from
+          spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 0-100 performance scale with realistic values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → selenium screenshot
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data arrays, no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (highcharts_core, selenium, urllib, tempfile, time,
+          Path)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Polar chart mode, AreaSeries for filled radar, gridLineInterpolation
+          polygon, pane settings, fillOpacity control, highcharts-more.js for radar
+          support
+  verdict: APPROVED
diff --git a/plots/radar-basic/metadata/letsplot.yaml b/plots/radar-basic/metadata/letsplot.yaml
index 382b079cd2..4cd743cfde 100644
--- a/plots/radar-basic/metadata/letsplot.yaml
+++ b/plots/radar-basic/metadata/letsplot.yaml
@@ -26,3 +26,179 @@ review:
     center
   - Could leverage lets-plot interactive features (tooltips showing exact values)
     since HTML output is generated
+  image_description: 'The plot shows a radar/spider chart comparing two employees
+    (Alice and Bob) across 6 competency categories: Creativity, Leadership, Communication,
+    Technical, Problem Solving, and Teamwork. Alice''s polygon is rendered in Python
+    blue (#306998) with semi-transparent fill, while Bob''s is in Python yellow (#FFD43B).
+    Both polygons have clear vertices marked with points and solid connecting lines.
+    The chart features dashed concentric circular gridlines at intervals of 20, 40,
+    60, 80, and 100, with radial spoke lines extending to each category axis. Category
+    labels are positioned at the outer edge of each axis. Scale values (20-100) are
+    displayed along the right side of the top spoke. A legend on the right side identifies
+    the two employees. The title "radar-basic · letsplot · pyplots.ai" appears in
+    the top-left corner.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, category labels, and legend text are all clearly readable
+          at the output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points and lines are visible and appropriately sized; polygons use
+          good alpha for overlap visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe and have excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas for symmetric radar; slight imbalance with
+          legend placement creating more whitespace on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for radar chart (no X/Y axes), but category labels are descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha; legend well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to axes, values to radial distance
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygons with alpha ~0.25, gridlines at 20/40/60/80/100, axis
+          labels, distinct colors, legend, closed polygons
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-100 scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Alice and Bob
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "radar-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows comparison between two series with different strengths; Alice
+          excels at Creativity/Technical while Bob excels at Communication/Teamwork.
+          Could show more dramatic variation to demonstrate radar chart capabilities
+          better
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance review is a perfect real-world application mentioned
+          in the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All values are realistic performance scores (60-90 range)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as "plot.png" but also saves plot.html (minor extra output)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar of graphics (geom_polygon, geom_line, geom_point,
+          geom_text), manual color scales, and theme customization. Good use of lets-plot
+          idioms but no advanced features like interactivity or tooltips that lets-plot
+          offers.
+  verdict: APPROVED
diff --git a/plots/radar-basic/metadata/matplotlib.yaml b/plots/radar-basic/metadata/matplotlib.yaml
index 7fb049489e..783295c12e 100644
--- a/plots/radar-basic/metadata/matplotlib.yaml
+++ b/plots/radar-basic/metadata/matplotlib.yaml
@@ -28,3 +28,177 @@ review:
     demonstrate radar chart capabilities
   - Basic matplotlib polar features used; advanced features like custom tick formatters
     or spine styling not utilized
+  image_description: 'The plot displays a radar/spider chart comparing two employees
+    (Senior Developer in blue and Team Lead in yellow) across 6 competency dimensions:
+    Technical Skills, Communication, Creativity, Leadership, Problem Solving, and
+    Teamwork. The chart uses a polar coordinate system with concentric gridlines at
+    20, 40, 60, 80, and 100. Each employee''s values are connected to form filled
+    polygons with transparency (alpha ~0.25). The Senior Developer (blue) shows higher
+    scores in Technical Skills and Problem Solving, while the Team Lead (yellow) excels
+    in Teamwork and Creativity. The title follows the correct format at the top. Axis
+    labels are placed at the outer edges, and a legend is positioned in the upper
+    right outside the plot area.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 18pt, radial labels at 14pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all axis labels clearly positioned around the
+          perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at 12pt with linewidth=3 are well-sized; minor deduction
+          for markers slightly large relative to data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and Yellow (#FFD43B) provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 12x12 figure, radar chart fills canvas well, legend positioned
+          outside without overlapping data
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Radar charts don't have traditional axis labels with units; categories
+          are descriptive but no units applicable
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid at alpha=0.4 with dashed lines is subtle, legend well-placed
+          outside chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on axes, values as polygon vertices - correct mapping
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygons with alpha ~0.25, gridlines at 20/40/60/80/100, axis
+          labels at outer edge, distinct colors, legend, polygon closed
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: 0-100 scale as recommended in spec, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series names correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"`Employee Performance · radar-basic · matplotlib · pyplots.ai`"
+          follows required format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows two series for comparison with different profiles; could show
+          more varied patterns (e.g., one very weak category)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance review is a classic, realistic radar chart application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 65-92 are realistic performance scores on 0-100 scale
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib polar API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses matplotlib's polar subplot correctly; could leverage additional
+          features like custom radial axis formatting or fill_between alternatives
+  verdict: APPROVED
diff --git a/plots/radar-basic/metadata/plotly.yaml b/plots/radar-basic/metadata/plotly.yaml
index e0745eeaef..f33c605011 100644
--- a/plots/radar-basic/metadata/plotly.yaml
+++ b/plots/radar-basic/metadata/plotly.yaml
@@ -27,3 +27,177 @@ review:
   - Data values are clustered (72-92); wider spread would demonstrate the chart ability
     to show strengths and weaknesses more dramatically
   - Canvas utilization could be improved - radar chart has generous margins
+  image_description: 'The radar chart displays employee performance across 6 competencies:
+    Technical Skills (highest, ~92), Communication (~85), Creativity (~80), Leadership
+    (~72), Problem Solving (~88), and Teamwork (~78). The polygon is filled with a
+    translucent light blue (rgba 48,105,152 @ 0.25 alpha) and outlined with a darker
+    blue (#306998) line with markers at each vertex. The title "radar-basic · plotly
+    · pyplots.ai" is centered at the top. A legend labeled "Performance Score" appears
+    in the upper right. Radial gridlines are visible at 20, 40, 60, 80, 100 intervals.
+    All axis labels are clearly readable around the perimeter.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, all labels well-spaced around the radar
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 12 are clearly visible, line width 3 is appropriate,
+          fill alpha 0.25 works well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single series uses blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Plot is well-centered but margins are generous; the radar could use
+          slightly more canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Radar charts don't have traditional X/Y axis labels; category labels
+          are descriptive but no units (performance scores are unitless, acceptable
+          for this context)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Gridlines at alpha 0.2 are subtle, legend well-positioned in upper
+          right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart type using Scatterpolar
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories mapped to theta, values to r correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygon with transparency (0.25), gridlines at intervals,
+          labeled axes, legend present, polygon closed
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Radial axis range 0-100 shows all data correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend label "Performance Score" accurately describes the data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: `radar-basic · plotly · pyplots.ai`'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows single series with variation across competencies; spec mentions
+          1-3 series for comparison, only 1 shown (basic implementation is acceptable
+          but doesn't showcase comparison feature)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance review is a perfect, real-world application
+          mentioned in the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 72-92 are realistic performance scores; could show more range
+          variation (e.g., one lower score to demonstrate the full 0-100 scale better)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses hardcoded deterministic data, no randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only `plotly.graph_objects` imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Plotly's Scatterpolar correctly, generates interactive HTML
+          output; could leverage more Plotly features like hover templates or animations
+  verdict: APPROVED
diff --git a/plots/radar-basic/metadata/plotnine.yaml b/plots/radar-basic/metadata/plotnine.yaml
index 8ea7bb5fb5..8bee6f78cb 100644
--- a/plots/radar-basic/metadata/plotnine.yaml
+++ b/plots/radar-basic/metadata/plotnine.yaml
@@ -26,3 +26,178 @@ review:
     at full resolution
   - No value labels on gridlines (e.g., 20, 40, 60, 80, 100) to help readers gauge
     values
+  image_description: 'The plot displays a radar/spider chart comparing two employees
+    (Alice and Bob) across six performance dimensions: Creativity, Leadership, Communication,
+    Technical, Problem Solving, and Teamwork. The chart features a white background
+    with dashed circular gridlines at regular intervals. Alice''s data is shown in
+    Python blue (#306998) and Bob''s in Python yellow (#FFD43B). Both polygons are
+    filled with transparency (~0.25 alpha), allowing overlapping areas to be visible.
+    Each vertex is marked with a colored point, and the category labels are positioned
+    around the outer edge of the chart. The legend is positioned to the right showing
+    "Employee" with Alice (blue) and Bob (yellow). The title "radar-basic · plotnine
+    · pyplots.ai" appears at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt is readable, category labels at 14pt are readable but
+          could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points sized at 5, lines at 1.5 width, polygons with 0.25 alpha -
+          all clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow have excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good square layout, radar chart fills canvas well, slight asymmetry
+          due to legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Radar chart has no traditional axes with units (expected for this
+          chart type, but spec doesn't require units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Dashed gridlines at 0.7 alpha are subtle, legend well placed on right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to axes, values determine distance from
+          center
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygons with transparency, gridlines at 20/40/60/80/100,
+          axis labels at outer edge, distinct colors with legend, polygon closed properly
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data values (60-90) visible within 0-100 scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels Alice and Bob
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "radar-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows two series with different strengths, but could show more varied
+          differences (e.g., one person clearly weaker in a category)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance review scenario is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in 60-90 range are realistic performance scores on 0-100 scale
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → coordinate transformation → plot →
+          save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but could add seed for
+          future modifications
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics (ggplot, aes, geom_polygon, geom_line,
+          geom_point, geom_text, theme), but radar charts are not a native strength
+          of plotnine - required manual coordinate transformation
+  verdict: APPROVED
diff --git a/plots/radar-basic/metadata/pygal.yaml b/plots/radar-basic/metadata/pygal.yaml
index 9d13fc5afe..1052a18e99 100644
--- a/plots/radar-basic/metadata/pygal.yaml
+++ b/plots/radar-basic/metadata/pygal.yaml
@@ -25,3 +25,180 @@ review:
   - Legend placement in upper-left corner creates visual imbalance
   - Axis label rotation for some categories reduces readability compared to horizontal
     text
+  image_description: 'The radar chart displays a hexagonal spider plot with 6 axes
+    representing employee competencies: Communication (top), Technical Skills (upper-left),
+    Teamwork (lower-left), Problem Solving (bottom), Leadership (lower-right), and
+    Creativity (upper-right). Two filled polygons are rendered with transparency -
+    blue (#306998) for Employee A and yellow/gold (#FFD43B) for Employee B. The blue
+    polygon shows higher values for Technical Skills (~92) and Communication (~85),
+    while the yellow polygon shows strength in Leadership (~85) and Teamwork (~90).
+    Gridlines are visible at intervals of 20 (0, 20, 40, 60, 80). The title "radar-basic
+    · pygal · pyplots.ai" appears at the top center. A legend in the upper-left corner
+    identifies both employees. The polygons use ~25% opacity allowing overlap areas
+    to be visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable; some axis labels (Technical Skills,
+          Creativity, Leadership, Teamwork) are rotated at angles which slightly impacts
+          readability
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Polygons well-sized with good dot markers (size 8), excellent use
+          of canvas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, distinct contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, but legend in far upper-left corner creates
+          slight imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels for competencies but no unit indication (though
+          percentages are implied)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid visible with dotted lines, legend functional but small relative
+          to chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to axes, values correctly plotted
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple series, filled polygons, transparency, gridlines, legend
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: 0-100 scale shown appropriately, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Employee A and Employee B
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "radar-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows comparison between two employees with varied strengths/weaknesses
+          across all 6 dimensions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance review is a classic, real-world radar chart
+          use case from the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in 68-92 range are realistic for employee competency scores
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → chart → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed used, but data is deterministic (hardcoded values),
+          should still receive points
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Radar chart, Style customization, fill mode, and dual
+          PNG/HTML output, but could leverage more pygal-specific features like tooltips
+          or custom formatting
+  verdict: APPROVED
diff --git a/plots/radar-basic/metadata/seaborn.yaml b/plots/radar-basic/metadata/seaborn.yaml
index d59c5b60be..57e90b9cc3 100644
--- a/plots/radar-basic/metadata/seaborn.yaml
+++ b/plots/radar-basic/metadata/seaborn.yaml
@@ -25,3 +25,183 @@ review:
   - Radar charts lack units notation - could add "(Score 0-100)" in subtitle or annotation
   - The two employee profiles could show more contrasting values to better demonstrate
     radar chart comparison capability
+  image_description: 'The plot displays a radar/spider chart comparing two employees
+    (Senior Developer in blue, Team Lead in yellow/gold) across 6 competency dimensions:
+    Technical Skills, Communication, Creativity, Problem Solving, Leadership, and
+    Teamwork. The chart uses a polar coordinate system with concentric gridlines at
+    20, 40, 60, 80, and 100. Both series are shown as filled polygons with transparency
+    (~0.25 alpha), allowing overlap visibility. Data points are marked with circular
+    markers at each vertex. The title "radar-basic · seaborn · pyplots.ai" appears
+    at the top in bold black text. A legend in the upper right corner identifies the
+    two series. The chart uses a square 1:1 aspect ratio with good canvas utilization.
+    Category labels are positioned at the outer edge of each axis and are clearly
+    readable.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, category labels at 22pt, tick labels at 18pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized at s=400 with linewidth=4, perfect for 6 data points
+          per series
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's colorblind palette, blue and gold/yellow are easily
+          distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format appropriate for radar, plot fills canvas well, balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: No axis labels with units (radar charts typically don't have traditional
+          axis labels, but score units could be noted)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid at alpha=0.3, legend well-placed in upper right with good styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on angular axis, values on radial axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygons with alpha 0.25, gridlines at 20/40/60/80/100, axis
+          labels at outer edge, distinct colors with legend, closed polygons
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: 0-100 scale as recommended, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Senior Developer and Team Lead
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "radar-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows comparison between two profiles with contrasting strengths,
+          but profiles could show more dramatic differences to better demonstrate
+          radar chart utility
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance review is a perfect, relatable use case from
+          spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (65-92 range), though slightly compressed -
+          could use fuller range
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → setup → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: No random seed, but data is deterministic (hardcoded values), minor
+          deduction for not including np.random.seed(42) even though not strictly
+          needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: matplotlib, numpy, pandas, seaborn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn 0.13+ API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.set_theme() and sns.scatterplot() for data points, plus
+          colorblind palette. However, radar charts are not a seaborn specialty -
+          the implementation correctly falls back to matplotlib polar projection for
+          the core chart while leveraging seaborn for styling and the data point markers
+  verdict: APPROVED
diff --git a/plots/radar-multi/metadata/altair.yaml b/plots/radar-multi/metadata/altair.yaml
index 750c1e22e9..ed0feaf33b 100644
--- a/plots/radar-multi/metadata/altair.yaml
+++ b/plots/radar-multi/metadata/altair.yaml
@@ -27,3 +27,179 @@ review:
   - The filled polygons use mark_line with filled=True which may not render as proper
     filled areas in all renderers - consider using mark_area or geoshape for more
     reliable polygon fills
+  image_description: 'The plot displays a multi-series radar chart comparing three
+    products (A, B, C) across six dimensions: Price, Quality, Durability, Support,
+    Features, and Design. The chart uses a hexagonal grid structure with concentric
+    gridlines at intervals of 20, 40, 60, 80, and 100. Product A is rendered in Python
+    blue (#306998), Product B in yellow (#FFD43B), and Product C in green (#4CAF50).
+    Each series is shown as a filled polygon with ~20% opacity and solid colored outlines
+    with circular markers at each vertex. The legend is positioned in the top-right
+    corner with a bordered box. Axis labels are bold and positioned outside the outermost
+    gridline. Grid value labels (20, 40, 60, 80, 100) appear along the vertical spoke.
+    The title "radar-multi · altair · pyplots.ai" is centered at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt bold, legend at 18-20pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers visible at size 200, outlines at strokeWidth 3, good polygon
+          fills with transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/green palette is colorblind-safe (avoids red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas, plot well-centered, but slightly more
+          whitespace at bottom than top
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels (Price, Quality, etc.) but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (opacity 0.6), legend well-placed but could be closer
+          to the chart
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-series radar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to axes, values to radial distance
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Filled polygons with transparency, distinct colors, legend, gridlines,
+          axis labels, closed polygons all present; however spec suggests alpha ~0.2-0.3,
+          implementation uses 0.2 which is correct
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-100 scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "radar-multi · altair · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across products with different strengths (A=Durability,
+          B=Support/Quality, C=Features/Design), but could show more extreme differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product comparison is a realistic scenario matching spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 60-95 on 0-100 scale are realistic for product ratings
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → transformations → chart layers → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no np.random.seed needed since
+          no random data used; however the code imports numpy but only uses it for
+          angle calculations which is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used and all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Altair's declarative layering system, proper use
+          of mark_line with filled=True for polygon fills, tooltips for interactivity,
+          alt.Scale for custom color domains, proper encoding types (Q, N), HTML export
+          for interactivity
+  verdict: APPROVED
diff --git a/plots/radar-multi/metadata/bokeh.yaml b/plots/radar-multi/metadata/bokeh.yaml
index c83b1ddcb4..8a71277246 100644
--- a/plots/radar-multi/metadata/bokeh.yaml
+++ b/plots/radar-multi/metadata/bokeh.yaml
@@ -24,3 +24,180 @@ review:
   - Legend is positioned outside the main plot area with some visual separation
   - Color palette could be more colorblind-friendly (blue-yellow combination)
   - Grid value labels are slightly small relative to axis labels
+  image_description: 'The plot shows a multi-series radar chart comparing three products
+    (Product A, B, C) across 6 attributes: Performance, Ease of Use, Price Value,
+    Support, Features, and Reliability. The chart uses a square 1:1 aspect ratio with
+    circular gridlines at intervals of 20, 40, 60, 80, 100. Product A is shown in
+    blue (#306998), Product B in yellow (#FFD43B), and Product C in a coral/red color
+    (#E57373). Each series is rendered as a filled polygon with transparency (~0.25
+    alpha) and distinct colored outlines. The polygons overlap visually, showing different
+    strengths for each product. Axis labels are positioned at the outer edges, and
+    a legend is placed on the right side identifying each product. The title "radar-multi
+    · bokeh · pyplots.ai" appears at the top center.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and grid values are clearly readable. Font sizes
+          are appropriate for the canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-spaced around the
+          perimeter.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Polygons are well-sized with appropriate alpha transparency allowing
+          overlapping regions to be distinguished. Vertex markers are visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral provide reasonable contrast, though blue-yellow
+          could be challenging for some colorblind users.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with the radar chart centered. Legend is appropriately
+          placed on the right side, though slightly separated from the main chart.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive category labels (Performance, Reliability, Features,
+          Support, Price Value, Ease of Use).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Gridlines are visible and subtle. Legend is functional but placed
+          outside the plot area with some separation.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-series radar/spider chart.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to radial axes, values mapped to distance
+          from center.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: filled polygons with transparency, distinct
+          colors, legend, gridlines, axis labels, closed polygons with fill and outline.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-100 scale.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies each product.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "radar-multi · bokeh · pyplots.ai".
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows differentiation across products with varying strengths/weaknesses.
+          Each product excels in different areas.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product comparison is a realistic and common use case for radar charts.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable (65-95 range), though somewhat clustered. Could
+          show more range variation.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation requiring seed).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs both plot.png and plot.html which is correct for bokeh.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's figure, ColumnDataSource for labels, LabelSet, Legend
+          with customization, and export_png/save for HTML output. Could leverage
+          more interactive features.
+  verdict: APPROVED
diff --git a/plots/radar-multi/metadata/highcharts.yaml b/plots/radar-multi/metadata/highcharts.yaml
index c2a01e0de8..99a96c17b6 100644
--- a/plots/radar-multi/metadata/highcharts.yaml
+++ b/plots/radar-multi/metadata/highcharts.yaml
@@ -29,3 +29,15 @@ review:
   - Data differentiation could be more dramatic - some values cluster in the 50-80
     range
   - Uses container.screenshot() instead of driver.save_screenshot() per library rules
+  image_description: 'The plot displays a multi-series radar chart with a white background.
+    The title "Product Comparison · radar-multi · highcharts · pyplots.ai" is displayed
+    at the top in bold black text. The radar chart has 6 axes radiating from the center:
+    Price, Quality, Durability, Support, Features, and Ease of Use. Four overlapping
+    polygons represent four products: Product A (Premium) in blue (#306998), Product
+    B (Budget) in yellow (#FFD43B), Product C (Balanced) in purple (#9467BD), and
+    Product D (Feature-Rich) in cyan (#17BECF). Each polygon has a semi-transparent
+    fill (~0.2 opacity) with visible outlines and data point markers. The gridlines
+    form concentric polygons at intervals of 0, 20, 40, 60, 80 (with subtle gray coloring).
+    A horizontal legend at the bottom shows all four products with their colors and
+    labels.'
+  verdict: APPROVED
diff --git a/plots/radar-multi/metadata/letsplot.yaml b/plots/radar-multi/metadata/letsplot.yaml
index 68b87f80aa..b24b43367a 100644
--- a/plots/radar-multi/metadata/letsplot.yaml
+++ b/plots/radar-multi/metadata/letsplot.yaml
@@ -24,3 +24,182 @@ review:
   - Grid value labels could be larger for better readability at scale
   - Category axis labels could include units (e.g., Score 0-100) for clarity
   - Color palette includes red-green which may not be ideal for all colorblind users
+  image_description: 'The plot displays a multi-series radar chart comparing 4 smartphones
+    (Galaxy S24, iPhone 15, Pixel 8, OnePlus 12) across 6 attributes: Performance,
+    Display, Camera, Battery, Price Value, and Storage. The chart features a hexagonal
+    structure with concentric gridlines at 20, 40, 60, 80, and 100 intervals (dashed
+    gray). Four distinctly colored polygons overlay each other with transparency (~0.2
+    alpha): blue (Galaxy S24), yellow (iPhone 15), red (Pixel 8), and green (OnePlus
+    12). Each polygon has thick colored outlines and circular markers at vertices.
+    Category labels are positioned at the outer edge of each axis. A legend on the
+    right identifies each product by color. The title follows the correct format:
+    "Smartphone Comparison · radar-multi · letsplot · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and category labels are clearly readable; grid value labels
+          (20, 40, 60, 80, 100) are legible but slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Polygons with transparency allow overlapping areas to be visible;
+          markers are appropriately sized; lines are thick enough
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color contrast between series; blue/yellow/red/green palette
+          works well, though red-green combination could be problematic for some colorblind
+          users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 1:1 aspect ratio is appropriate for radar chart; plot fills
+          canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Category labels are descriptive but lack units (e.g., "Score (0-100)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Dashed gridlines are subtle (alpha 0.6); legend is well-placed on
+          the right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-series radar chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on axes, values as radial distance, series as colored
+          polygons
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: transparent fills (~0.2), distinct colors,
+          legend, gridlines, axis labels, closed polygons, fill + outline'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show 0-100 scale with proper gridlines
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 products by color
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Smartphone Comparison · radar-multi · letsplot
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 products across 6 attributes with varied values; demonstrates
+          overlapping areas well; could show more extreme differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Smartphone comparison is a real, relatable scenario; attributes are
+          meaningful for phone comparison
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for smartphone ratings (65-95 range); all products
+          competitive which is realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → coordinate transformation → plot layers
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed as data is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports from lets_plot and standard libraries
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets_plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Saves as plot.png but also outputs HTML; minor: uses path="." parameter'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with proper geom layering; could leverage
+          more lets-plot specific features like tooltips or interactive elements for
+          HTML export
+  verdict: APPROVED
diff --git a/plots/radar-multi/metadata/matplotlib.yaml b/plots/radar-multi/metadata/matplotlib.yaml
index 6a18f2c19a..e9f9626b1e 100644
--- a/plots/radar-multi/metadata/matplotlib.yaml
+++ b/plots/radar-multi/metadata/matplotlib.yaml
@@ -26,3 +26,176 @@ review:
     or advanced polar customization)
   - Radial tick labels (14pt gray) could be slightly larger for better readability
     at full resolution
+  image_description: 'The radar chart displays a multi-series comparison of three
+    products (A, B, C) across six attributes: Performance, Battery Life, Camera, Display,
+    Build Quality, and Value. Product A is shown in blue (#306998), Product B in yellow
+    (#FFD43B), and Product C in pink (#E377C2). Each product forms a filled polygon
+    with 25% transparency, allowing overlap visibility. The chart has concentric circular
+    gridlines at 20, 40, 60, 80, and 100 intervals. Category labels are positioned
+    at the outer edge of each axis in bold black text. A legend is placed in the upper
+    right. The title follows the required format at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 18pt bold, radial labels at 14pt -
+          all readable but radial tick labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Polygons clearly visible, markers (size 10) and lines (width 3) appropriate,
+          alpha 0.25 allows overlap visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/pink is generally colorblind-safe but pink might be confused
+          with red by some
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 12x12 format perfect for radar, plot fills canvas well, legend
+          well positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Category labels are descriptive but no units (not applicable for
+          this type)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.4 is good, legend well placed but could have slightly
+          better framealpha
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart with multiple series
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on axes, values radially, series distinguished by color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygons with transparency, distinct colors, legend, gridlines
+          at intervals, closed polygons, both fill and outline
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: 0-100 scale shown completely
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three products
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: {title} · {spec-id} · {library} · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied product profiles: A excels at Camera/Display, B at
+          Battery/Build, C at Performance/Value - demonstrates comparison use case
+          perfectly'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product comparison across tech attributes is a realistic, relatable
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 60-95 on 0-100 scale are realistic product scores
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values, no randomness)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib polar API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic matplotlib polar plot, no distinctive features like custom
+          projections or special styling
+  verdict: APPROVED
diff --git a/plots/radar-multi/metadata/plotly.yaml b/plots/radar-multi/metadata/plotly.yaml
index 34398f97b9..c1b068b723 100644
--- a/plots/radar-multi/metadata/plotly.yaml
+++ b/plots/radar-multi/metadata/plotly.yaml
@@ -25,3 +25,174 @@ review:
   - Legend margin (r=200) creates excessive whitespace on the right side of the plot
   - Could leverage Plotly-specific interactive features like custom hover templates
     to show exact values
+  image_description: 'The plot displays a multi-series radar chart comparing three
+    products across six attributes. The chart shows three overlapping polygons with
+    semi-transparent fills: Product A (Premium) in blue/slate color, Product B (Budget)
+    in yellow, and Product C (Pro) in teal/cyan. The six axes are labeled: Performance,
+    Reliability, Price Value, Support, Ease of Use, and Features. Concentric gridlines
+    are visible at 20, 40, 60, 80, and 100 intervals. The title "radar-multi · plotly
+    · pyplots.ai" appears centered at the top. A legend on the right side identifies
+    each product series with colored markers and labels. The background is white with
+    subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (32pt), axis labels are clearly readable (20pt), tick
+          labels are appropriately sized (16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All text elements are well-spaced with no overlapping
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Polygons have appropriate transparency (0.25 alpha), line widths
+          (3px), and marker sizes (10px) for clear visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and teal are colorblind-safe and provide good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though legend placement creates some whitespace
+          on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels for all axes (Performance, Reliability, etc.)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle, but legend has excessive margin space (r=200) creating
+          imbalance
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-series radar/spider chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on angular axis, values on radial axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygons with transparency, distinct colors, legend, gridlines
+          at 20/40/60/80/100, closed polygons
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Radial axis shows 0-100 range correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three products
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "radar-multi · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows products with different strengths (premium overall, budget
+          value, pro performance), though differences could be more pronounced
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product comparison is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are on 0-100 scale as recommended, though some values cluster
+          in 60-95 range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values, no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported, which is used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses Scatterpolar correctly but doesn't leverage Plotly's interactive
+          features in a unique way (hover templates, animations, etc.)
+  verdict: APPROVED
diff --git a/plots/radar-multi/metadata/plotnine.yaml b/plots/radar-multi/metadata/plotnine.yaml
index 86e864c0f2..88c1fcee0d 100644
--- a/plots/radar-multi/metadata/plotnine.yaml
+++ b/plots/radar-multi/metadata/plotnine.yaml
@@ -26,3 +26,181 @@ review:
     np.random.seed(42)
   - Color palette includes red and green which can be problematic for red-green colorblind
     users
+  image_description: 'The plot displays a multi-series radar chart comparing 4 products
+    across 6 attributes (Support, Durability, Quality, Price, Design, Features). Four
+    distinct colored polygons are shown: blue (Product A), yellow (Product B), red
+    (Product C), and green (Product D). Each polygon is filled with ~0.2 alpha transparency,
+    allowing overlapping areas to be visible. Circular dashed gray gridlines indicate
+    value levels. Axis labels are positioned around the perimeter of the chart. The
+    title "radar-multi · plotnine · pyplots.ai" appears at the top. A legend on the
+    right identifies each product by color with marker symbols. Points are clearly
+    marked at each vertex with connecting lines outlining each polygon.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and readable, axis labels are clear, legend text is
+          appropriately sized. Slightly under ideal size for 4800px output.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points and lines are visible, polygons distinguishable. Points could
+          be slightly larger for the data density.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Red/Green palette is reasonably distinguishable, though
+          red-green could be problematic for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square aspect ratio is appropriate for radar chart, good use of canvas
+          space, though legend could be positioned closer to the plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Clear descriptive labels (Support, Durability, Quality, Price, Design,
+          Features)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and low alpha, legend is well-placed
+          and readable
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct radar/spider chart type with multiple series
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories mapped to angles, values mapped to radial distance correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has filled polygons with transparency, distinct colors, legend, gridlines,
+          axis labels, closed polygons
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the 0-100 scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 products
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "radar-multi · plotnine · pyplots.ai" format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 distinct product profiles with varying strengths across 6
+          attributes, demonstrating comparison capability well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product comparison across Price, Quality, Durability, Support, Features,
+          Design is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in 60-95 range on 0-100 scale are realistic and create meaningful
+          visual differentiation
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: false
+        comment: 'Linear structure: imports → data → coordinate conversion → plot
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed set (data is deterministic, but best practice would
+          include it)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics (ggplot, aes, geom_polygon, geom_line,
+          geom_point, geom_text) effectively. Creative workaround for radar chart
+          by converting to Cartesian coordinates since plotnine doesn't have native
+          polar coordinates. Could use more advanced theming.
+  verdict: APPROVED
diff --git a/plots/radar-multi/metadata/pygal.yaml b/plots/radar-multi/metadata/pygal.yaml
index 1ab685120d..5f832b911a 100644
--- a/plots/radar-multi/metadata/pygal.yaml
+++ b/plots/radar-multi/metadata/pygal.yaml
@@ -27,3 +27,178 @@ review:
     reduces readability slightly
   - Yellow series may have reduced visibility against the white background compared
     to the blue and red series
+  image_description: 'The plot displays a multi-series radar chart with 6 axes arranged
+    in a hexagonal pattern: Communication (top), Problem Solving (top-right), Teamwork
+    (right), Creativity (bottom), Leadership (bottom-left), and Technical (left).
+    Three teams are visualized as overlapping filled polygons with transparency: Alpha
+    Team (blue, #306998), Beta Team (yellow, #FFD43B), and Gamma Team (red/pink, #E74C3C).
+    The chart has a white background with gridlines at intervals of 20 from 0-100.
+    Data points are marked with colored dots at each vertex. The legend is positioned
+    in the top-left corner showing team names with colored squares. The title "radar-multi
+    · pygal · pyplots.ai" appears at the top center.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and legend text are clearly readable; axis labels are legible
+          but could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Polygons are well-sized with good transparency (0.25), dots are visible
+          (size 12)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red provide good contrast; yellow may be slightly
+          less visible against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 3600x3600 canvas is well-utilized; radar chart is centered
+          and properly sized
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Communication", "Technical", etc.) but no
+          units shown (N/A for this type)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is present and not too dominant; legend placement in corner
+          works but could be better integrated
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-series radar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on axes, values as polygon vertices, series as distinct
+          polygons
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Filled polygons with transparency, distinct colors, legend, gridlines,
+          axis labels, closed polygons, both fill and outline
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Range set to (0, 100) as spec recommends
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Team names correctly displayed in legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 3 series with 6 dimensions; demonstrates overlapping areas
+          well; could show more variation in strengths/weaknesses between teams
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Team skill assessment is a realistic and comprehensible scenario
+          mentioned in spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are in 65-92 range, realistic for skill assessments on 0-100
+          scale
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data (no random seed needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style are imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses pygal.Radar() with custom Style, but could leverage more pygal-specific
+          features like tooltips configuration or value formatting
+  verdict: APPROVED
diff --git a/plots/radar-multi/metadata/seaborn.yaml b/plots/radar-multi/metadata/seaborn.yaml
index a8ecec9290..1d2e501884 100644
--- a/plots/radar-multi/metadata/seaborn.yaml
+++ b/plots/radar-multi/metadata/seaborn.yaml
@@ -24,3 +24,185 @@ review:
   - Data values are somewhat tightly clustered (65-90 range) which reduces the visual
     impact of comparing products - more dramatic variation would better showcase the
     radar chart strengths
+  image_description: 'The plot displays a multi-series radar chart with a square (1:1)
+    aspect ratio. Three overlapping polygons represent Product A (blue/Python Blue
+    #306998), Product B (yellow #FFD43B), and Product C (red/coral #E74C3C). The chart
+    has 6 axes radiating from the center: Performance, Value, Support, Features, Usability,
+    and Reliability. Each axis is clearly labeled at the outer edge with readable
+    black text. The radial gridlines show scale markers at 20, 40, 60, 80, and 100
+    in gray. The title "radar-multi · seaborn · pyplots.ai" appears at the top in
+    bold. A legend in the upper right corner identifies all three products. The polygons
+    use fill with ~25% transparency and solid outlines with markers at data points.
+    The chart uses a white background with subtle dashed gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 24pt, axis labels 18pt, radial ticks 14pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, axis labels well-spaced around the
+          perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Polygons clearly visible with good alpha (0.25), markers sized well
+          (10), lines thick (3) - slight deduction as some overlapping polygon regions
+          are harder to distinguish
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, red are highly distinguishable including for colorblind
+          users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format perfect for radar, plot fills canvas well, legend positioned
+          cleanly
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Axis labels are descriptive (Performance, Reliability, etc.) but
+          no units shown (though 0-100 scale is implied for ratings)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is appropriately subtle (alpha 0.3-0.4), legend well placed
+          but could be closer to chart
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct multi-series radar chart with overlapping polygons
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on axes, values as radial distance, series as different
+          polygons
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: filled polygons with transparency, distinct
+          colors, legend, gridlines at intervals, axis labels, closed polygons, both
+          fill and outline'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-100 with all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three products
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 3 products across 6 dimensions with varied profiles - each
+          product has different strengths/weaknesses. Could show more dramatic variation
+          to better demonstrate radar chart's comparison power
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product comparison across quality dimensions (Performance, Reliability,
+          Usability, Features, Support, Value) is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in 65-90 range are realistic product scores, though tighter
+          clustering reduces visual impact
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) though data is actually static
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot, numpy, seaborn used - all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_style and sns.set_context for global styling, but core
+          radar chart is built with matplotlib's polar projection. Seaborn doesn't
+          have native radar chart support, so this is appropriate use of seaborn for
+          styling while leveraging matplotlib for the specialized polar plot.
+  verdict: APPROVED
diff --git a/plots/raincloud-basic/metadata/altair.yaml b/plots/raincloud-basic/metadata/altair.yaml
index 43e3e28381..fe71facb5c 100644
--- a/plots/raincloud-basic/metadata/altair.yaml
+++ b/plots/raincloud-basic/metadata/altair.yaml
@@ -25,3 +25,180 @@ review:
   weaknesses:
   - Grid lines are quite subtle (alpha 0.3) making them barely visible
   - Jittered points could be slightly larger for better visibility at full resolution
+  image_description: 'The plot displays a raincloud visualization comparing reaction
+    times across three conditions: Control (blue), Treatment A (yellow), and Treatment
+    B (green). Each condition shows the classic raincloud components: half-violin
+    density curves (clouds) extending to the right, boxplots with white median lines
+    in the center, and jittered individual data points (rain) scattered to the left.
+    The title "raincloud-basic · altair · pyplots.ai" appears at the top. The Y-axis
+    shows "Reaction Time (ms)" ranging from 200-600ms. X-axis labels identify each
+    condition clearly. A legend in the top-right corner identifies the three conditions
+    with colored circles. Treatment B notably shows a bimodal distribution in its
+    density curve. The colors are colorblind-friendly (blue, yellow, green).'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All text is readable; title and axis labels are appropriately sized.
+          Minor: could be slightly larger for optimal viewing'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and data clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Points and boxplots visible, though some jittered points are small;
+          the half-violins are well-rendered
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/green palette is colorblind-safe and provides good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; minor asymmetry with legend placement but
+          overall well-balanced
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "Reaction Time (ms)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-placed and clear; subtle grid lines present but could
+          be more visible
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct raincloud plot with all three required components (half-violin,
+          boxplot, jittered points)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, continuous values on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: half-violin cloud, boxplot with median,
+          jittered rain points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately (200-600ms range)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three conditions
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "raincloud-basic · altair · pyplots.ai" but uses
+          regular dot instead of middot character
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: shows unimodal (Control, Treatment A) AND bimodal (Treatment
+          B) distributions demonstrating the power of rainclouds to reveal hidden
+          patterns'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction times for treatment conditions is a perfect, realistic psychology
+          experiment scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Reaction times of 300-550ms are realistic for cognitive tasks
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible random data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using transform_calculate and manual positioning is acceptable but
+          not the cleanest approach
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's layering, transform_density, and interactive() features.
+          However, the implementation is complex with manual positioning rather than
+          leveraging Altair's more declarative strengths
+  verdict: APPROVED
diff --git a/plots/raincloud-basic/metadata/bokeh.yaml b/plots/raincloud-basic/metadata/bokeh.yaml
index 9b6f56fc5b..ec40c70b33 100644
--- a/plots/raincloud-basic/metadata/bokeh.yaml
+++ b/plots/raincloud-basic/metadata/bokeh.yaml
@@ -27,3 +27,177 @@ review:
   - Scatter point size (12) could be slightly larger for better visibility at 4800x2700
     resolution
   - Box plot height (0.08) is quite thin relative to the cloud and rain elements
+  image_description: 'The plot displays a horizontal raincloud visualization with
+    four treatment groups (Control, Treatment A, Treatment B, Treatment C) arranged
+    vertically. Each group shows three components: a half-violin/KDE "cloud" above
+    the centerline in distinct colors (blue for Control, yellow for Treatment A, green
+    for Treatment B, coral/orange for Treatment C), a white boxplot with black outlines
+    at the center showing median and quartiles, and jittered scatter points ("rain")
+    below the centerline. The x-axis shows "Reaction Time (ms)" ranging from ~200-600ms,
+    and the y-axis shows "Treatment Group". The title reads "raincloud-basic · bokeh
+    · pyplots.ai". A legend on the right identifies each treatment group. Treatment
+    B notably shows a bimodal distribution in its cloud shape. Grid lines are subtle
+    with dashed styling.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, though tick labels could be slightly
+          larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all elements clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points visible with good alpha, though slightly small for the canvas
+          size
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette (blue, yellow, green, coral)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Reaction Time (ms)" includes units, "Treatment Group" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but legend appears duplicated/clipped at top-right
+          corner
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct raincloud plot with all three components
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Horizontal orientation with categories on Y, values on X
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Half-violin cloud above, boxplot in middle, jittered rain below
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate x-range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match category names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "raincloud-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows bimodal distribution (Treatment B), different spreads and means;
+          could show outliers more explicitly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction times for treatment groups is a real psychology experiment
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Reaction times 200-600ms are realistic, though some values extend
+          slightly beyond typical ranges
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html correctly, but the code structure
+          is slightly verbose
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, HoverTool with custom tooltips, interactive
+          legend with click_policy="hide", but could leverage more Bokeh-specific
+          features like linked brushing
+  verdict: APPROVED
diff --git a/plots/raincloud-basic/metadata/highcharts.yaml b/plots/raincloud-basic/metadata/highcharts.yaml
index 3ca857aeaf..a7d3820d15 100644
--- a/plots/raincloud-basic/metadata/highcharts.yaml
+++ b/plots/raincloud-basic/metadata/highcharts.yaml
@@ -24,3 +24,179 @@ review:
   - Legend is overly cluttered with 9 entries (Cloud, Box Plot, and Points for each
     category) - could consolidate to show just the 4 condition names
   - Grid line styling could be more subtle to reduce visual noise
+  image_description: 'The plot displays a raincloud visualization for four experimental
+    conditions (Control, Treatment A, Treatment B, Treatment C). Each condition features
+    three elements arranged horizontally: jittered scatter points ("rain") on the
+    left in lighter shades, white boxplots with black outlines in the center showing
+    median lines and whiskers, and half-violin KDE curves ("clouds") on the right
+    in solid colors. The colorblind-safe palette uses blue for Control, yellow for
+    Treatment A, purple for Treatment B, and cyan for Treatment C. Treatment B clearly
+    shows a bimodal distribution in its cloud shape. The title "raincloud-basic ·
+    highcharts · pyplots.ai" appears at the top, with axis labels "Experimental Condition"
+    (x-axis) and "Reaction Time (ms)" (y-axis). A legend on the right categorizes
+    all series.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at large
+          size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all components well-separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Markers well-sized with good alpha, clouds visible; minor: jitter
+          points slightly overlap within groups'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/purple/cyan palette is colorblind-safe, no red-green
+          conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though legend area creates some empty space on
+          right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have descriptive labels with units (ms)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is overly detailed (9 entries for 4 groups), creates visual
+          noise
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct raincloud plot with all three components (cloud, boxplot,
+          rain)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, reaction times on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All three elements present: half-violin, boxplot with median/quartiles,
+          jittered points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (250-650) shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows unimodal (Control, Treatment A, C) and bimodal (Treatment B)
+          distributions; good variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction time experiment with control and treatment conditions is
+          realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Reaction times 250-650ms are plausible, though some variation seems
+          extreme
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts Core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses PolygonSeries for KDE, BoxPlotSeries, ScatterSeries; could leverage
+          more interactive features
+  verdict: APPROVED
diff --git a/plots/raincloud-basic/metadata/letsplot.yaml b/plots/raincloud-basic/metadata/letsplot.yaml
index cf015d52e1..3e4a22a561 100644
--- a/plots/raincloud-basic/metadata/letsplot.yaml
+++ b/plots/raincloud-basic/metadata/letsplot.yaml
@@ -26,3 +26,174 @@ review:
     top-right or outside placement
   - Jittered rain points are uniformly dark gray instead of matching condition colors,
     which reduces visual cohesion
+  image_description: |-
+    The plot displays a raincloud visualization for three experimental conditions (Control, Treatment A, Treatment B) comparing reaction times in milliseconds. Each condition shows:
+    - **Half-violin (cloud)**: Positioned on the right side of each category in distinct colors - blue for Control, yellow for Treatment A, and green for Treatment B
+    - **Box plot**: White/gray boxplots centered-left showing median, quartiles, and whiskers
+    - **Jittered points (rain)**: Dark gray scatter points positioned to the left of the boxplots
+    - **Colors**: Blue (#306998), Yellow (#FFD43B), Green (#5BA85B) - colorblind-friendly palette
+    - **Title**: "raincloud-basic · letsplot · pyplots.ai" at top
+    - **Axes**: Y-axis "Reaction Time (ms)" ranging 200-700, X-axis "Experimental Condition"
+    - **Legend**: Positioned in bottom-right showing condition colors
+    - **Layout**: Clean minimal theme with subtle horizontal grid lines
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis labels 20pt, tick labels 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violin, boxplot, and jittered points all visible; rain points could
+          be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/green palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, though Treatment B violin extends close to edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "(ms)", X-axis descriptive "Experimental Condition"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend placement at bottom-right is reasonable but slightly overlaps
+          with data region
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct raincloud plot with all three components
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, continuous values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has half-violin, boxplot, jittered points; layout follows spec (cloud
+          right, rain left); bimodal distribution shown in Treatment B
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies conditions
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "raincloud-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows normal, shifted, and bimodal distributions; Treatment B clearly
+          shows bimodality
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction time experiment is a real psychology research scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Reaction times 250-700ms are realistic for cognitive tasks
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses show_half parameter for half-violin, position_nudge, layer_tooltips
+          for interactivity, flavor_high_contrast_light(); could use more advanced
+          lets-plot features
+  verdict: APPROVED
diff --git a/plots/raincloud-basic/metadata/matplotlib.yaml b/plots/raincloud-basic/metadata/matplotlib.yaml
index 5aa3cd6bed..24a04af91b 100644
--- a/plots/raincloud-basic/metadata/matplotlib.yaml
+++ b/plots/raincloud-basic/metadata/matplotlib.yaml
@@ -26,3 +26,180 @@ review:
     legend or making it show components (cloud, boxplot, rain) instead
   - Could leverage scipy.stats.gaussian_kde for cleaner KDE implementation rather
     than manual calculation
+  image_description: 'The plot displays a horizontal raincloud visualization for three
+    experimental conditions (Control, Treatment A, Treatment B) showing reaction times
+    in milliseconds. Each condition has three elements arranged vertically: a half-violin
+    density curve (cloud) in the upper region colored blue (#306998) for Control,
+    yellow (#FFD43B) for Treatment A, and green (#4CAF50) for Treatment B; a white
+    boxplot with dark gray borders in the middle showing median, quartiles, and whiskers;
+    and jittered scatter points (rain) below each boxplot with the same colors as
+    their respective clouds. The x-axis shows "Reaction Time (ms)" ranging from ~175
+    to 550ms, and the y-axis shows "Experimental Condition" with the three category
+    labels. The title reads "raincloud-basic · matplotlib · pyplots.ai" in the correct
+    format. A legend is positioned outside the plot area on the upper right. The bimodal
+    distribution of Treatment A is clearly visible in its cloud, and Treatment B shows
+    outliers extending toward 500ms.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16/18pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all elements clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized well (s=110), alpha=0.6 appropriate for ~80 points
+          per group; slight improvement possible for denser regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green palette is colorblind-safe and provides good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, legend positioned outside but well-placed;
+          minor spacing could be tighter
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has units "Reaction Time (ms)", Y-axis is descriptive "Experimental
+          Condition"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but legend duplicates y-axis labels which
+          is redundant
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct raincloud plot with half-violin, boxplot, and jittered points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Cloud on top, boxplot in middle, rain below - follows spec layout
+          perfectly
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers around 500ms
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match category names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "raincloud-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows unimodal (Control), bimodal (Treatment A), and distribution
+          with outliers (Treatment B)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction times for experimental conditions is a classic use case
+          in psychology research
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Reaction times in 200-500ms range are realistic for human responses
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at beginning
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot, numpy, and Patch (all used)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Implements KDE manually instead of using scipy.stats.gaussian_kde
+          or matplotlib's built-in violin capabilities
+  verdict: APPROVED
diff --git a/plots/raincloud-basic/metadata/plotly.yaml b/plots/raincloud-basic/metadata/plotly.yaml
index 0dfe8051b5..6beda35c56 100644
--- a/plots/raincloud-basic/metadata/plotly.yaml
+++ b/plots/raincloud-basic/metadata/plotly.yaml
@@ -27,3 +27,186 @@ review:
     items show condition-specific names like Control (Cloud) which is confusing
   - Could leverage more Plotly-specific interactive features
   - Implementation header has pre-filled quality score before actual review
+  image_description: 'The plot displays a raincloud visualization for four experimental
+    conditions (Control, Treatment A, Treatment B, Treatment C) showing reaction time
+    data in milliseconds. Each condition has three components: (1) a half-violin "cloud"
+    shown on the right side in distinct colors (blue for Control, light blue for Treatment
+    A, yellow for Treatment B, gray for Treatment C), (2) a white box plot with black
+    outlines in the center showing median, quartiles, and whiskers, and (3) jittered
+    data points ("rain") on the left side of each condition. The title "raincloud-basic
+    · plotly · pyplots.ai" is centered at the top. Y-axis shows "Reaction Time (ms)"
+    ranging from 200-700ms, X-axis shows "Experimental Condition". A legend labeled
+    "Raincloud Components" in the upper right explains Cloud (Distribution), Stats
+    (Box Plot), and Rain (Data Points). Treatment B clearly shows a bimodal distribution
+    in its violin shape. The plot has a clean white background with subtle horizontal
+    gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 20pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Markers well-sized with proper opacity, violin density clearly visible.
+          Minor: some rain points at edges slightly overlap'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, light blue, yellow, gray palette is colorblind-safe with good
+          differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, legend positioned neatly
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Reaction Time (ms)" and "Experimental Condition" are descriptive
+          with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Legend labels are incorrect: shows "Cloud (Distribution)" but actual
+          legend items show condition names like "Control (Cloud)", creating inconsistency'
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct raincloud plot with half-violin, box plot, and jittered points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Has all three components (cloud, box, rain). However, spec notes
+          "cloud on top, rain below" but here cloud is on right, rain on left. For
+          vertical orientation spec says "cloud on right side, boxplot centered, rain
+          points on left/below" - implementation follows this, but rain is beside
+          rather than "below" the boxplot
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range 200-700ms shows all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies the three components
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "raincloud-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: Control (normal distribution), Treatment A (narrower,
+          shifted), Treatment B (bimodal clearly visible in violin!), Treatment C
+          (wider distribution with outliers)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction times in psychology experiments is a perfect real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 250-700ms are realistic for human reaction times
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Header says "Quality: 91/100" which was pre-set, should not have
+          quality in implementation header until review is done'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Violin and go.Box with good hover templates and custom styling.
+          However, doesn't leverage Plotly's interactivity features like buttons,
+          sliders, or annotations. HTML output is generated but could showcase more
+          interactive capabilities
+  verdict: APPROVED
diff --git a/plots/raincloud-basic/metadata/plotnine.yaml b/plots/raincloud-basic/metadata/plotnine.yaml
index 76aeed0f40..bdcf0eb051 100644
--- a/plots/raincloud-basic/metadata/plotnine.yaml
+++ b/plots/raincloud-basic/metadata/plotnine.yaml
@@ -24,3 +24,177 @@ review:
   - No legend showing color-to-condition mapping (relies solely on y-axis labels)
   - Uses scipy for KDE instead of native plotnine density geoms (geom_violin with
     half-violin trimming would be more idiomatic)
+  image_description: |-
+    The plot shows a horizontal raincloud visualization with three experimental conditions (Control, Treatment A, Treatment B) displayed vertically. Each condition has three components arranged from top to bottom:
+    - **Cloud (half-violin/KDE)**: Blue for Control, Yellow for Treatment A, Green for Treatment B - positioned above the boxplot showing distribution density
+    - **Boxplot**: White-filled narrow boxplots centered on each row showing median, quartiles, and whiskers
+    - **Rain (jittered points)**: Scattered points below the boxplot in matching colors with moderate transparency
+
+    The title "raincloud-basic · plotnine · pyplots.ai" appears at the top. X-axis shows "Reaction Time (ms)" ranging from ~300-600, Y-axis shows "Experimental Condition" with the three group labels. Treatment B clearly shows a bimodal distribution in both the cloud and rain points. The visual metaphor of "rain falling from clouds" is achieved correctly.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable with
+          appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Rain points are visible with good alpha; could be slightly larger
+          for optimal visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green palette is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; minor extra whitespace on left side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Reaction Time (ms)" and "Experimental
+          Condition"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present (colors are not labeled in a legend, though category
+          labels on y-axis help)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct raincloud plot with all three components (half-violin, boxplot,
+          jittered points)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on Y-axis, values on X-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: cloud on top, boxplot centered, rain
+          below'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Category labels on Y-axis serve as legend; colors consistently matched
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "raincloud-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: shows unimodal (Control, Treatment A) AND bimodal (Treatment
+          B) distributions'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction time experiment is a real, comprehensible psychological
+          research scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Reaction times in 300-600ms range are realistic for cognitive tasks
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and np.random.seed(123) for deterministic
+          output
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: All imports used, but scipy is an additional dependency beyond core
+          plotnine
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Minor: header shows Quality: 86/100 which is outdated'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plotnine grammar (ggplot, geom_ribbon, geom_boxplot,
+          geom_point, coord_flip, theme system). Creative use of geom_ribbon for half-violin.
+          However, relies on scipy for KDE calculation rather than native plotnine
+          density estimation.
+  verdict: APPROVED
diff --git a/plots/raincloud-basic/metadata/pygal.yaml b/plots/raincloud-basic/metadata/pygal.yaml
index a7d092ec7f..48ecd89d53 100644
--- a/plots/raincloud-basic/metadata/pygal.yaml
+++ b/plots/raincloud-basic/metadata/pygal.yaml
@@ -22,3 +22,185 @@ review:
   weaknesses:
   - Box plot elements could be slightly thicker/more visible relative to the half-violin
   - No interactive tooltips leveraging pygal SVG interactivity capabilities
+  image_description: 'The plot displays a raincloud visualization for three treatment
+    groups (Control, Treatment A, Treatment B) showing reaction time data in milliseconds.
+    Each group uses a distinct color: blue (#306998) for Control, yellow (#FFD43B)
+    for Treatment A, and green (#4CAF50) for Treatment B. For each group, from left
+    to right: jittered data points ("rain") appear on the left side, followed by a
+    box plot (showing median, quartiles, and whiskers) in the center, and a half-violin
+    density plot ("cloud") on the right side. The title "raincloud-basic · pygal ·
+    pyplots.ai" appears at the top. The Y-axis shows "Reaction Time (ms)" ranging
+    from 100-700, and the X-axis shows "Treatment Group" with the three category labels.
+    The layout has subtle grid lines on a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable at full size; title, axis labels, and tick marks
+          are clearly visible with appropriate font sizes for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Data points are clearly visible with good sizing; the dots_size=32
+          works well for the data density. Minor: box plot elements could be slightly
+          more prominent'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette using blue, yellow, and green which are distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space; plot fills appropriate area. Minor: some
+          extra whitespace on right edge'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Reaction Time (ms)" and "Treatment
+          Group"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend shown (acceptable since
+          colors map to x-axis categories, but could be cleaner)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct raincloud plot with all three elements: half-violin (cloud),
+          box plot, and jittered points (rain)'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values (reaction times) on Y-axis correctly
+          assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has half-violin, box plot with median/quartiles/whiskers, and jittered
+          points. Minor: the spec notes "cloud on top, rain below" for horizontal
+          or "cloud on right, rain on left" for vertical - implementation follows
+          vertical correctly'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (100-750) shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No explicit legend, relies on x-axis labels (functional but not ideal)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "raincloud-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct distributions with different means and spreads;
+          includes outliers. Data demonstrates the value of raincloud plots well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction times for control vs treatment groups is a classic, realistic
+          psychology experiment scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Reaction times in 180-700ms range are perfectly realistic for human
+          response time studies
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save. While the code is more
+          complex due to manual raincloud construction, no unnecessary functions or
+          classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and secondary seeds for jitter
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and pygal.style.Style used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as both plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart with custom styling to build a complex visualization.
+          While impressive manual construction, it doesn't leverage pygal-specific
+          features like built-in interactivity or tooltips
+  verdict: APPROVED
diff --git a/plots/raincloud-basic/metadata/seaborn.yaml b/plots/raincloud-basic/metadata/seaborn.yaml
index c1fcdacec9..eaede7fcdb 100644
--- a/plots/raincloud-basic/metadata/seaborn.yaml
+++ b/plots/raincloud-basic/metadata/seaborn.yaml
@@ -24,3 +24,168 @@ review:
   - Violin clipping approach uses direct vertex manipulation rather than seaborn-native
     methods
   - Legend overlaps slightly with data points in the upper left corner
+  image_description: 'The plot displays a raincloud visualization comparing reaction
+    times across three experimental conditions (Control, Treatment A, Treatment B).
+    Each condition is represented with a colorblind-accessible color scheme: blue
+    (#306998) for Control, yellow/gold (#E6A800) for Treatment A, and green (#4DAF4A)
+    for Treatment B. The raincloud structure is correctly implemented with half-violins
+    (clouds) on the right side of each category, white boxplots with black borders
+    in the middle showing median and quartiles, and jittered strip points (rain) on
+    the left side. The title "raincloud-basic · seaborn · pyplots.ai" is displayed
+    at the top. The Y-axis shows "Reaction Time (ms)" ranging from about 300-575ms,
+    and the X-axis shows "Condition". The Treatment B distribution clearly shows bimodality
+    in its half-violin, demonstrating the advantage of rainclouds over traditional
+    boxplots. A legend is positioned in the upper left corner. Grid lines are subtle
+    and dashed on the y-axis only.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points visible with appropriate size and alpha, slight overlap in
+          dense areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/green palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight extra whitespace on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Reaction Time (ms)" includes units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but legend placement in upper left overlaps
+          with the data area slightly
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct raincloud plot with all three elements
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, continuous values on Y
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Half-violin, boxplot, jittered points all present; layout follows
+          cloud-right, rain-left convention
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match conditions
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Format is correct but uses middot (·) character consistently
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows normal distribution (Control), tighter distribution (Treatment
+          A), and bimodal distribution (Treatment B)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Reaction time experiment is a plausible psychology research scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 280-575ms are realistic for human reaction times
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses legacy approach for violin manipulation
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/residual-basic/metadata/altair.yaml b/plots/residual-basic/metadata/altair.yaml
index 544ded47fd..23b335045a 100644
--- a/plots/residual-basic/metadata/altair.yaml
+++ b/plots/residual-basic/metadata/altair.yaml
@@ -25,3 +25,175 @@ review:
   - Axis labels lack units (though for residuals this is often acceptable)
   - Data scenario is generic rather than tied to a specific real-world regression
     context
+  image_description: The plot displays a residual plot with blue circular markers
+    (#306998) showing residuals against fitted values. The x-axis is labeled "Fitted
+    Values" ranging from 5 to 55, and the y-axis is labeled "Residuals" with a symmetric
+    range from -6.0 to 6.0. A dashed black horizontal reference line at y=0 clearly
+    indicates the zero-residual baseline. A yellow/gold LOESS smoothed trend line
+    (bandwidth 0.4) weaves through the data showing the local trend of residuals.
+    The title "residual-basic · altair · pyplots.ai" is displayed at the top center.
+    The data shows 100 points with visible outliers around (16, 4.5), (30, 3.8), and
+    (41, -4.2), demonstrating heteroscedasticity with slightly increasing variance
+    as fitted values increase. The grid is subtle with low opacity, and the overall
+    layout is clean with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (150) with good opacity (0.6), appropriate
+          for 100 data points; minor deduction as slightly larger would be ideal
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          colorblind-safe palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas, plot well-centered with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Fitted Values", "Residuals") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (0.3 opacity), no legend needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot (scatter with reference line)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on X, residuals on Y - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal reference line at y=0, transparency
+          (0.6), LOESS trend line, proper axis labels, symmetric y-axis'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric y-axis (-6 to 6), appropriate x-axis (5-55)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exactly matches `{spec-id} · {library} · pyplots.ai` format
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows outliers, heteroscedasticity pattern, random scatter; could
+          show more pronounced funnel shape
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Plausible regression residuals but generic (not tied to real-world
+          scenario)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Residual values realistic for standardized regression context
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Altair's declarative layering (alt.layer), transform_loess
+          for LOESS smoothing, tooltips, and proper encoding with scale configurations
+  verdict: APPROVED
diff --git a/plots/residual-basic/metadata/bokeh.yaml b/plots/residual-basic/metadata/bokeh.yaml
index fa7e9c17b9..09b7fc3509 100644
--- a/plots/residual-basic/metadata/bokeh.yaml
+++ b/plots/residual-basic/metadata/bokeh.yaml
@@ -26,3 +26,175 @@ review:
     not implemented (optional but would enhance diagnostic capability)
   - Feature coverage could show more pronounced heteroscedasticity pattern for better
     demonstration of this diagnostic feature
+  image_description: 'The plot displays a residual scatter plot with 150 blue circular
+    markers (color #306998) against a white background. The title "residual-basic
+    · bokeh · pyplots.ai" appears at the top in black text. The x-axis is labeled
+    "Fitted Values ($K)" ranging from approximately 150 to 500, and the y-axis is
+    labeled "Residuals ($K)" ranging symmetrically from approximately -100 to +100.
+    A horizontal dashed reference line at y=0 helps assess systematic bias. The markers
+    have moderate transparency (alpha=0.6) revealing overlapping points. The plot
+    shows random scatter around zero with visible outliers at approximately (+95,
+    -80, +85) and a subtle heteroscedasticity pattern. Subtle gray grid lines are
+    present. The Bokeh toolbar icons are visible in the top-right corner.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 72pt, axis labels at 48pt, tick labels at 36pt - all perfectly
+          readable at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized at 40 with alpha 0.6 appropriate for 150 points, revealing
+          density and outliers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998), good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, symmetric y-axis range
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Fitted Values ($K)" and "Residuals
+          ($K)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3), but no legend needed for single-series
+          plot - deducting 0 pts since no legend is required
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot showing residuals vs fitted values
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on x-axis, residuals on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Has reference line at y=0, alpha transparency, symmetric y-axis,
+          but missing LOWESS/LOESS trend line mentioned as "consider" in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric y-axis range around zero, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "residual-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows random scatter, outliers, slight heteroscedasticity pattern
+          - good variety but could show more extreme heteroscedasticity
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: House price regression model is a realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in $K for housing prices (150-500K fitted, ±100K residuals)
+          are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: ColumnDataSource, HoverTool with formatted tooltips, Span for reference
+          line, HTML export for interactivity
+  verdict: APPROVED
diff --git a/plots/residual-basic/metadata/highcharts.yaml b/plots/residual-basic/metadata/highcharts.yaml
index 8ffccf22c1..cfc1bac44c 100644
--- a/plots/residual-basic/metadata/highcharts.yaml
+++ b/plots/residual-basic/metadata/highcharts.yaml
@@ -25,3 +25,179 @@ review:
   - Margins are generous but slightly excessive, reducing the effective plot area
   - Trend line label says Trend (Smoothed) but spec suggests LOWESS/LOESS terminology
     would be more precise
+  image_description: The plot displays a residual scatter plot with a blue color scheme
+    (#306998) for data points. The title "residual-basic · highcharts · pyplots.ai"
+    appears at the top with a subtitle "Residuals from Linear Regression Model." The
+    X-axis is labeled "Fitted Values" ranging from ~10 to ~98, and the Y-axis is labeled
+    "Residuals" with a symmetric range from -40 to +40. A horizontal blue reference
+    line at y=0 is present with a "Zero Reference" label on the right. The scatter
+    points show moderate transparency with blue outlines. A yellow/gold smoothed trend
+    line (labeled "Trend (Smoothed)" in the legend) runs through the data showing
+    slight variation. The legend is positioned in the top-right corner. Several outliers
+    are visible (around +28, +33, and -25). The layout uses the full canvas well with
+    appropriate margins.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          size with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements, everything is well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized (radius 14) with good alpha (0.6)
+          for 120 points. Slightly larger than optimal but still effective
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) palette is colorblind-safe, no
+          red-green conflict
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with appropriate margins, though margins are quite
+          large leaving some unused space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Fitted Values" and "Residuals" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha via color), legend is well-placed but could
+          be more compact
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot for residual visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on X, residuals on Y as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has horizontal reference line at y=0, moderate transparency (~0.6),
+          smoothed trend line (moving average), proper axis labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric y-axis range around zero as recommended in spec
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Residuals" and "Trend (Smoothed)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "residual-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows random scatter around zero, clear outliers at +28/+33/-25,
+          and mild heteroscedasticity. Could show slightly more pronounced patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible regression residuals scenario, but context is generic (not
+          tied to specific domain)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for regression residuals with fitted values 10-90
+          and residuals ±40
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple sequential structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, highcharts components, selenium, etc.)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as both plot.png and plot.html (correct for interactive library)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Highcharts features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotLines for zero reference, spline series for trend, proper
+          chart configuration. Could leverage more interactive tooltips or data labels
+          for outlier highlighting
+  verdict: APPROVED
diff --git a/plots/residual-basic/metadata/letsplot.yaml b/plots/residual-basic/metadata/letsplot.yaml
index 2c360a49cc..df7c15c3ea 100644
--- a/plots/residual-basic/metadata/letsplot.yaml
+++ b/plots/residual-basic/metadata/letsplot.yaml
@@ -23,3 +23,175 @@ review:
   weaknesses:
   - Grid lines could be more subtle (lower alpha value)
   - Data context is generic rather than demonstrating a specific domain application
+  image_description: The plot displays a residual plot with fitted values (15-100)
+    on the x-axis and residuals (approximately -14 to 12) on the y-axis. Blue scatter
+    points (with ~0.6 alpha transparency) show the distribution of residuals around
+    zero. A red dashed horizontal reference line at y=0 provides a baseline for assessing
+    bias. A yellow/gold LOESS smoothed trend line shows the local pattern in residuals,
+    staying close to zero across the fitted value range. The plot has a clean minimal
+    theme with subtle gray gridlines, clear axis labels ("Fitted Values" and "Residuals
+    (Observed − Predicted)"), and the title "residual-basic · letsplot · pyplots.ai"
+    at the top. Several outliers are visible (around -13 and +11), demonstrating realistic
+    regression diagnostics.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized for the 100-point dataset with good alpha,
+          though slightly small
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points, red reference line, yellow trend line - all distinguishable
+          and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Fitted Values" and "Residuals (Observed − Predicted)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed (single series), but grid could be more subtle (alpha
+          higher than ideal)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on X, residuals on Y as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes reference line at y=0, LOESS trend line, alpha transparency
+          on points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis approximately symmetric around zero (-14 to +12)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; single data series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "residual-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows heteroscedasticity pattern and outliers as specified, good
+          diagnostic interest
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible regression residuals, though context is generic rather
+          than domain-specific
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for regression diagnostics
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses wildcard import with noqa comments (acceptable for lets-plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, geom_smooth with LOESS, theme customization;
+          could leverage more lets-plot specific features
+  verdict: APPROVED
diff --git a/plots/residual-basic/metadata/matplotlib.yaml b/plots/residual-basic/metadata/matplotlib.yaml
index 715eb29d68..dede324df6 100644
--- a/plots/residual-basic/metadata/matplotlib.yaml
+++ b/plots/residual-basic/metadata/matplotlib.yaml
@@ -28,3 +28,179 @@ review:
     is self-explanatory
   - Moving average implementation is manual rather than using scipy or statsmodels
     LOWESS for more accurate smoothing
+  image_description: 'The plot displays a residual plot with "Fitted Values" on the
+    x-axis (ranging from approximately 10 to 90) and "Residuals" on the y-axis (ranging
+    from approximately -25 to +25, symmetric around zero). Blue circular markers with
+    white edges and moderate transparency (alpha ~0.6) represent individual residual
+    points. A horizontal black reference line at y=0 clearly indicates where residuals
+    equal zero. A yellow/gold smoothed trend line (moving average) runs through the
+    data, hovering near zero and showing slight deviations. The y-axis is symmetric
+    around zero as recommended. The legend in the upper right corner shows three items:
+    "Zero Reference", "Residuals", and "Trend (Smoothed)". A subtle dashed grid aids
+    readability. The title follows the required format: "residual-basic · matplotlib
+    · pyplots.ai". The data shows realistic heteroscedasticity with variance increasing
+    slightly at higher fitted values, and several clear outliers are visible (around
+    -22 and +25).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at s=150 with alpha=0.6 are well-sized for 150 points; slightly
+          on the larger side but appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, good use of tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Fitted Values", "Residuals") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), legend is well-placed but includes "Zero
+          Reference" which adds clutter
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot (scatter with reference line and trend)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on X, residuals on Y as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: y=0 reference line, transparency, smoothed
+          trend line, proper labels, symmetric y-axis'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with symmetric y-axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match plot elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "residual-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: shows random scatter, heteroscedasticity pattern, clear
+          outliers - all key aspects of residual analysis'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible regression residuals scenario; fitted values 10-90 and
+          residuals ±25 are reasonable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 150 points is appropriate, values are sensible for regression diagnostics
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API with Axes methods
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" which is correct, but the file is not in the
+          expected location
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of axhline, scatter with edge colors, zorder for layering;
+          could leverage more matplotlib features like axhspan for confidence bands
+  verdict: APPROVED
diff --git a/plots/residual-basic/metadata/plotly.yaml b/plots/residual-basic/metadata/plotly.yaml
index 70dbe45089..0bbc1b022f 100644
--- a/plots/residual-basic/metadata/plotly.yaml
+++ b/plots/residual-basic/metadata/plotly.yaml
@@ -26,3 +26,181 @@ review:
     into specific regions or spike lines for precise value reading
   - Marker size (14) slightly large for 150 points - could be reduced to 10-12 for
     cleaner appearance
+  image_description: 'The plot displays a residual analysis visualization with the
+    title "residual-basic · plotly · pyplots.ai" centered at the top. The X-axis shows
+    "Fitted Values" ranging from approximately 10 to 100, and the Y-axis shows "Residuals"
+    ranging from approximately -12 to +12. Blue circular markers (#306998) with moderate
+    transparency (alpha ~0.6) represent 150 individual residual points. A yellow dashed
+    horizontal line at y=0 serves as the zero reference line. A red solid trend line
+    (smoothed moving average) oscillates near zero, revealing subtle patterns in the
+    residuals. The plot demonstrates heteroscedasticity - variance clearly increases
+    as fitted values increase (tighter spread on the left, wider spread on the right).
+    A well-formatted legend in the top-right corner identifies all three elements:
+    Residuals, Zero Reference, and Trend (Smoothed). The background is white with
+    subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all elements clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at size 14 with 0.6 opacity work well for 150 points, though
+          slightly on the larger side
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red are distinguishable; no red-green confusion
+          issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good margins, plot fills canvas appropriately, legend positioned
+          well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Fitted Values" and "Residuals" but no units specified
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.3 is appropriate; legend is well-placed but slightly
+          overlaps data region
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot showing residuals vs fitted values
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Fitted values, Y=Residuals correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has horizontal reference line at y=0, smoothed trend line (LOWESS
+          approximation via moving average), moderate transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis symmetric around zero
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "residual-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows heteroscedasticity pattern, random scatter around zero, subtle
+          non-linear pattern via trend line. Missing explicit outliers that stand
+          out dramatically
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Data simulates realistic regression diagnostics with heteroscedasticity,
+          a common real-world pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for generic regression residuals; could be
+          more contextual (e.g., specific units for a real scenario)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Scatter and custom hover templates which are Plotly features,
+          but doesn't leverage more advanced interactive features like range sliders,
+          buttons, or annotations that would enhance diagnostic exploration
+  verdict: APPROVED
diff --git a/plots/residual-basic/metadata/plotnine.yaml b/plots/residual-basic/metadata/plotnine.yaml
index fb87fbff82..c70d5e17d4 100644
--- a/plots/residual-basic/metadata/plotnine.yaml
+++ b/plots/residual-basic/metadata/plotnine.yaml
@@ -24,3 +24,175 @@ review:
   - Axis labels lack units (though for generic residuals, units may not be applicable)
   - Grid styling could use slightly higher alpha for better visibility (currently
     0.3)
+  image_description: The plot displays a residual plot with approximately 150 blue
+    scatter points (#306998) plotted against fitted values. A solid black horizontal
+    reference line at y=0 helps assess systematic bias. A yellow/gold LOWESS smoothed
+    trend line traverses the plot, showing the residuals are mostly centered around
+    zero with slight variation. The x-axis is labeled "Fitted Values" (ranging from
+    ~10 to 90), and the y-axis is labeled "Residuals" (symmetric range from -30 to
+    +30). The title "residual-basic · plotnine · pyplots.ai" appears in bold at the
+    top. The plot uses a minimal theme with subtle dashed gray grid lines. Points
+    have moderate transparency (alpha=0.6) allowing density visualization. Several
+    outliers are visible (around +25 at x≈85, -22 at x≈55), demonstrating heteroscedasticity
+    pattern where variance increases with fitted values.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis titles 20pt, tick labels 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points sized well (size=4) with good alpha (0.6) for 150 points,
+          slightly on the smaller side but clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points with yellow trend line, good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Fitted Values" and "Residuals" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid alpha set to 0.3 with dashed style (good), but no legend needed/present
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on X, residuals on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal reference line at y=0, transparency,
+          LOWESS trend line, proper axis labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Symmetric y-axis around zero as spec recommends
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this plot type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "residual-basic · plotnine · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows heteroscedasticity pattern, outliers, and mostly random scatter
+          - demonstrates key diagnostic aspects well
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible regression residuals scenario with realistic patterns,
+          though generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sensible fitted values (10-90) and residual range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plotnine's grammar of graphics with geom_hline, geom_smooth
+          (LOWESS), theme_minimal, and proper theming. Could have used more advanced
+          features like faceting or custom scales.
+  verdict: APPROVED
diff --git a/plots/residual-basic/metadata/pygal.yaml b/plots/residual-basic/metadata/pygal.yaml
index 56f256e210..6331175df1 100644
--- a/plots/residual-basic/metadata/pygal.yaml
+++ b/plots/residual-basic/metadata/pygal.yaml
@@ -25,3 +25,177 @@ review:
     placement would be cleaner
   - Axis labels lack units (e.g., could use more descriptive context)
   - Realistic context could be stronger with more specific scenario labeling
+  image_description: 'The plot displays a residual scatter plot on a white background
+    with blue circular markers (Python Blue #306998) representing residuals plotted
+    against fitted values. The fitted values range from approximately 10 to 50 on
+    the x-axis, while the residuals span from -8 to +8 on the y-axis with symmetric
+    scaling. A dashed red/salmon horizontal reference line at y=0 helps assess systematic
+    bias. The title "residual-basic · pygal · pyplots.ai" appears at the top center
+    in gray text. Axis labels clearly show "Fitted Values" and "Residuals". The legend
+    is positioned in the top-left corner showing "Residuals" (blue square) and "Zero
+    Reference" (red square). Several outliers are visible at approximately (+15, +8.5),
+    (+30, -9.2), and (+42, +7.8). The majority of points cluster randomly around the
+    zero line, demonstrating good model behavior. Subtle horizontal dotted grid lines
+    aid readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; font sizes are appropriate
+          for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend is well-separated from data
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are visible with good size (dots_size=14); transparency at
+          0.6 reveals density well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue for data points and red for reference line provide good contrast;
+          colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; plot area is well-proportioned though legend
+          could be at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Fitted Values" and "Residuals" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle horizontal grid lines (y_guides only); legend is readable
+          and well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct XY scatter plot for residual visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on X-axis, residuals on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes horizontal reference line at y=0, transparency, symmetric
+          y-axis
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Residuals" and "Zero Reference"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "residual-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows random scatter around zero, outliers, and slight heteroscedasticity;
+          demonstrates residual patterns well
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Plausible regression diagnostics scenario; generic but appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sensible fitted values (10-50) and residual magnitudes
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and pygal imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses XY chart, custom Style, stroke_style for dashed line; good but
+          not exceptional pygal usage
+  verdict: APPROVED
diff --git a/plots/residual-plot/metadata/altair.yaml b/plots/residual-plot/metadata/altair.yaml
index 088af3dbf0..aa307bb832 100644
--- a/plots/residual-plot/metadata/altair.yaml
+++ b/plots/residual-plot/metadata/altair.yaml
@@ -27,3 +27,183 @@ review:
   - Points use outlined circles which can be slightly less visible than filled circles
     at smaller sizes
   - The grid lines behind the ±2σ dashed lines create slight visual noise
+  image_description: The plot displays a residual plot for housing price predictions
+    with "Fitted Values ($)" on the x-axis (ranging from 0 to ~520,000) and "Residuals
+    ($)" on the y-axis (ranging from -50,000 to +40,000). Blue outlined circles represent
+    "Normal" data points, while yellow outlined circles denote "Outlier (>2σ)" points.
+    A black dashed horizontal reference line at y=0 indicates perfect predictions.
+    Two gray dashed horizontal lines mark the ±2 standard deviation boundaries (~±31,000).
+    A prominent red LOESS smoothing line reveals a characteristic "U-shaped" pattern
+    in the residuals, starting high, dipping negative in the mid-range, and rising
+    again - clearly demonstrating the non-linear relationship that the linear model
+    failed to capture. The legend is positioned on the right side with "Point Type"
+    header. The title correctly follows the format "residual-plot · altair · pyplots.ai".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels and ticks are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Points are well-sized (size=120) with good opacity (0.7), appropriate
+          for 150 data points. Minor deduction: outlined points rather than filled
+          could be slightly harder to see'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and provide
+          excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, minor whitespace on the left side of the
+          plot area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Fitted Values ($)" and "Residuals
+          ($)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (gridOpacity=0.3), but legend placement could be improved
+          - it's somewhat isolated on the right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot showing residuals vs fitted values
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Y-axis shows residuals, X-axis shows fitted values as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal reference line at y=0, alpha
+          transparency, LOESS smoothing line, ±2σ bands, outlier coloring'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible, axes show complete range with nice scaling
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Normal" and "Outlier (>2σ)" categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "residual-plot · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows outliers, non-linear pattern via LOESS, heteroscedasticity
+          visible. Minor: could show more extreme outliers'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Housing price prediction is an excellent, comprehensible real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Values are realistic for housing prices ($200k-$500k fitted, residuals
+          in $10k-$40k range). Slight deduction: the quadratic component creates a
+          very pronounced pattern'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot layers → combine → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: altair, numpy, pandas'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Saves both plot.png and plot.html (correct), but minor: ideally
+          should only output what''s needed'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's declarative layering system, transform_loess
+          for smoothing, tooltips for interactivity. Could better leverage Altair's
+          selection/interaction capabilities
+  verdict: APPROVED
diff --git a/plots/residual-plot/metadata/bokeh.yaml b/plots/residual-plot/metadata/bokeh.yaml
index d4d696cd4c..d613194973 100644
--- a/plots/residual-plot/metadata/bokeh.yaml
+++ b/plots/residual-plot/metadata/bokeh.yaml
@@ -23,3 +23,173 @@ review:
     edges, making them hard to read
   - Legend text size could be slightly larger for better visibility at the 4800x2700
     resolution
+  image_description: "The plot displays a residual plot for regression diagnostics\
+    \ with a light gray (#fafafa) background. The title \"residual-plot · bokeh ·\
+    \ pyplots.ai\" appears at the top left in dark gray text. The x-axis shows \"\
+    Fitted Values (Predicted Price in $1000s)\" ranging from ~125 to ~475, and the\
+    \ y-axis shows \"Residuals (Observed - Predicted)\" ranging from approximately\
+    \ -100 to +100. \n\nBlue circular markers (Python blue #306998) represent normal\
+    \ residuals scattered around the zero line. Yellow/gold markers (#FFD43B) with\
+    \ blue borders highlight outliers beyond ±2 standard deviations - visible at the\
+    \ extreme top and bottom of the plot. A solid blue horizontal reference line at\
+    \ y=0 indicates perfect predictions. A light gray shaded band spans the ±2 SD\
+    \ region (~-50 to ~+50), bounded by dashed gray lines. Small italic labels \"\
+    +2 SD\" and \"-2 SD\" appear near the left edge of the band boundaries. The legend\
+    \ in the top-left corner shows \"Residuals\" and \"Outliers (>2 SD)\". The grid\
+    \ is subtle with dashed lines and low alpha."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all readable; SD labels slightly
+          small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker sizes appropriate for 150 points, good alpha transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight excess whitespace at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units ("$1000s")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha 0.3), but legend labels could be larger
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot (scatter with reference line)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on x-axis, residuals on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has zero line, ±2 SD bands, outlier highlighting, alpha transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies residuals and outliers
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "residual-plot · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows random scatter around zero, outliers, variance pattern
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Housing price prediction is plausible; residual distribution realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Price range $150k-$450k and residuals ±70 are sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Band, Span, Label, ColumnDataSource, and interactive
+          tools
+  verdict: APPROVED
diff --git a/plots/residual-plot/metadata/highcharts.yaml b/plots/residual-plot/metadata/highcharts.yaml
index 9d214acf9b..3d1187b919 100644
--- a/plots/residual-plot/metadata/highcharts.yaml
+++ b/plots/residual-plot/metadata/highcharts.yaml
@@ -23,3 +23,167 @@ review:
     markers, and interactive HTML output
   weaknesses:
   - Grid lines could be slightly more visible for better readability
+  image_description: The plot displays a residual plot with "residual-plot · highcharts
+    · pyplots.ai" as the title. The x-axis shows "Fitted Values ($)" ranging from
+    about 150k to 550k, and the y-axis shows "Residuals ($)" ranging from about -60k
+    to 120k. Blue circular markers represent regular residuals scattered around a
+    blue horizontal zero line labeled "Zero Line (Perfect Fit)". Red diamond markers
+    indicate outliers (>2σ). Orange dashed horizontal lines at approximately ±58k
+    mark the +2σ and -2σ thresholds. A legend in the top right corner shows "Residuals"
+    (blue circle) and "Outliers (>2σ)" (red diamond). The data shows residuals randomly
+    scattered around zero with no obvious pattern, indicating a good model fit. There
+    are 5 outliers visible - 3 above the +2σ threshold and 2 below the -2σ threshold.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text readable, appropriate sizing for canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: markers well-sized for 120 data points with alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: colorblind-safe blue/red-orange scheme
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: good canvas utilization with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'descriptive with units: "Fitted Values ($)", "Residuals ($)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: grid lines somewhat faint
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct scatter plot for residuals
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: fitted values on x-axis, residuals on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: zero line, ±2σ bands, outlier coloring, alpha transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: correctly labels series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses {spec-id} · {library} · pyplots.ai
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows residuals around zero with outliers both sides
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: housing price prediction scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: plausible values for housing prices
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: linear script, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: plotLines, multiple series, interactive HTML
+  verdict: APPROVED
diff --git a/plots/residual-plot/metadata/letsplot.yaml b/plots/residual-plot/metadata/letsplot.yaml
index 35daebf4a5..01184ea319 100644
--- a/plots/residual-plot/metadata/letsplot.yaml
+++ b/plots/residual-plot/metadata/letsplot.yaml
@@ -26,3 +26,184 @@ review:
   - Grid alpha at 0.5 is slightly too prominent; recommended 0.2-0.4 for subtler appearance
   - Wildcard import pattern, while necessary for lets-plot, could be more explicit
   - Could leverage lets-plot interactive tooltips in HTML output for enhanced exploration
+  image_description: 'The plot displays a residual plot with 150 data points. The
+    main visualization uses coral/red colored scatter points for normal residuals
+    and blue colored points for outliers (beyond ±2 standard deviations). A solid
+    blue horizontal reference line at y=0 represents perfect predictions. Two red
+    dashed horizontal lines mark the ±2σ outlier boundaries at approximately ±26.5.
+    A yellow LOWESS smoothing curve clearly shows a U-shaped pattern, correctly revealing
+    the non-linear relationship in the residuals (due to the deliberate quadratic
+    term in the data). The x-axis shows ''Fitted Values'' ranging from ~40 to ~480,
+    and the y-axis shows ''Residuals (Observed - Predicted)'' ranging from ~-25 to
+    ~30. The legend on the right side distinguishes ''Outlier (>2σ)'' and ''Normal''
+    point types. The title correctly follows the format: ''residual-plot · letsplot
+    · pyplots.ai''. The overall layout is clean with a minimal theme and subtle gray
+    gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold at 24pt, axis titles at 20pt, tick labels at 16pt,
+          legend text at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=5) with appropriate alpha (0.7) for 150
+          data points; slight deduction as some overlapping points in dense areas
+          could benefit from slightly smaller size
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) vs coral/red (#DC2626) provides good contrast and
+          is colorblind-distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: ''Fitted Values'' and ''Residuals (Observed
+          - Predicted)'''
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid uses alpha=0.5 which is slightly too prominent (recommended
+          0.2-0.4), and the panel_grid_major setting makes gridlines more visible
+          than ideal
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot type with residuals vs fitted values
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on x-axis, residuals on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Includes: horizontal y=0 reference line, LOWESS smoothing, ±2σ outlier
+          bands, outlier coloring'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies 'Normal' and 'Outlier (>2σ)' point types
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: ''residual-plot · letsplot · pyplots.ai'''
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows heteroscedasticity (increasing variance with X), non-linear
+          pattern (U-shaped LOWESS), outliers at both extremes, and residuals scattered
+          around zero
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible regression scenario with deliberate model misspecification;
+          slightly generic as it doesn't name a specific domain
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for a regression context
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses wildcard import with noqa comment (acceptable for lets-plot
+          but not ideal)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with geom_hline, geom_smooth(method='loess'),
+          theme_minimal(), and proper scaling. Could have used lets-plot's interactive
+          tooltip features for HTML output.
+  verdict: APPROVED
diff --git a/plots/residual-plot/metadata/matplotlib.yaml b/plots/residual-plot/metadata/matplotlib.yaml
index ab8f81e367..8c13b5535a 100644
--- a/plots/residual-plot/metadata/matplotlib.yaml
+++ b/plots/residual-plot/metadata/matplotlib.yaml
@@ -27,3 +27,182 @@ review:
     similar notation)
   - Could demonstrate heteroscedasticity more clearly with variance that changes across
     fitted values
+  image_description: The plot displays a residual plot with fitted values (ranging
+    from approximately -5 to 50) on the x-axis and residuals (observed minus predicted,
+    ranging from about -10 to 10) on the y-axis. The title reads "residual-plot ·
+    matplotlib · pyplots.ai" in large font at the top. Regular residual points are
+    shown as blue circles (#306998) with white edges, while outliers exceeding 2 standard
+    deviations are highlighted in yellow (#FFD43B) with blue edges. A solid black
+    horizontal reference line at y=0 represents perfect predictions. Dashed gray horizontal
+    lines mark the ±2σ boundaries, with a light blue shaded band between them. A red
+    polynomial trend line curves through the data, showing a clear U-shaped pattern
+    indicating the non-linearity that the linear model fails to capture. The legend
+    is positioned in the upper left corner with clear labels for all elements.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized appropriately (s=150) for 150 data points with good
+          alpha (0.7). Slightly larger than optimal per guidelines (100-200 for 100-300
+          points) but still effective
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow color scheme is colorblind-safe, good contrast between
+          regular points and outliers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Fitted Values" and "Residuals (Observed - Predicted)" are descriptive
+          but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), legend well-placed in upper left with
+          good framealpha
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot with residuals vs fitted values
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows fitted values, Y-axis shows residuals (y_true - y_pred)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal reference line at y=0, alpha
+          transparency, outlier coloring (>2σ), ±2σ bands, trend/smoothing line to
+          detect patterns'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly describe all plot elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "residual-plot · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Excellent demonstration: shows non-linear pattern in residuals (U-shape
+          from quadratic component in true data), outliers visible (4-5 yellow points),
+          heteroscedasticity somewhat visible. Could show more varied outlier distribution'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic regression scenario with simulated linear model fit to
+          data with quadratic component
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable; fitted values 0-50 and residuals ±10 are plausible
+          for regression diagnostics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of matplotlib's axhline, fill_between, scatter with styling,
+          and polyfit for trend line. Could leverage more advanced features like subplot
+          with marginal distributions or custom tick formatting
+  verdict: APPROVED
diff --git a/plots/residual-plot/metadata/plotly.yaml b/plots/residual-plot/metadata/plotly.yaml
index 67d7791c4c..f6fbdf7ea6 100644
--- a/plots/residual-plot/metadata/plotly.yaml
+++ b/plots/residual-plot/metadata/plotly.yaml
@@ -28,3 +28,185 @@ review:
   - Axis labels lack units (though residuals are unitless, y-axis label is verbose)
   - Moving average smoothing could benefit from using a more sophisticated LOWESS
     approach for smoother trend detection
+  image_description: The plot displays a residual plot with fitted values on the x-axis
+    (ranging from approximately -1 to 30) and residuals on the y-axis (ranging from
+    approximately -6 to 5). The main data points are shown as blue circular markers
+    with semi-transparency. Outliers (points beyond ±2 standard deviations) are highlighted
+    as yellow/gold diamond-shaped markers. The plot includes a solid black horizontal
+    reference line at y=0, dashed yellow/gold lines at ±2 standard deviations (~±3.8),
+    and a red trend line (smoothing curve) that shows slight curvature from the left
+    side before flattening out. The title reads "residual-plot · plotly · pyplots.ai"
+    centered at the top. The legend is positioned in the upper-left corner with a
+    white background, showing all trace types. The overall layout uses a clean white
+    background with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Markers are well-sized (14px) with good alpha (0.7), outliers slightly
+          larger (16px) with distinct diamond shape. Minor deduction: some data points
+          cluster but remain distinguishable'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/red color scheme is colorblind-safe; good contrast against
+          white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+          in upper-left
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Fitted Values", "Residuals (y_true - y_pred)")
+          but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), legend well-placed with background; however,
+          legend shows both +2 SD and -2 SD as separate entries which is slightly
+          redundant
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot type showing residuals vs fitted values
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis = fitted values, Y-axis = residuals, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has: horizontal reference line at y=0, alpha transparency, smoothing
+          line, ±2 SD bands, outliers colored differently. Minor: could show pattern
+          detection more clearly'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend labels are accurate but +2 SD and -2 SD could be combined
+          into a single "±2 SD" entry
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "residual-plot · plotly · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows residuals with variation, outliers both positive and negative,
+          visible pattern from non-linear data (captured by smoothing line curving
+          at left), demonstrates heteroscedasticity potential
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic linear regression scenario with intentional slight non-linearity
+          to show interesting patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 150 samples, residuals in reasonable range, sensible fitted values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple flat structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Code is current, but `line=dict(width=1, color=...)` inside marker
+          dict could be cleaner
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html for interactivity)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Graph Objects with hover templates for interactivity, exports
+          both PNG and HTML. Could leverage more Plotly features like annotations
+          or rangeslider
+  verdict: APPROVED
diff --git a/plots/residual-plot/metadata/plotnine.yaml b/plots/residual-plot/metadata/plotnine.yaml
index 55aab66de9..e3727254f3 100644
--- a/plots/residual-plot/metadata/plotnine.yaml
+++ b/plots/residual-plot/metadata/plotnine.yaml
@@ -25,3 +25,179 @@ review:
     Response" or add domain context)
   - Data is somewhat generic - could use a more realistic scenario with meaningful
     variable names instead of abstract X/y_true
+  image_description: 'The plot displays a residual plot with fitted values (ranging
+    from ~3 to ~23) on the x-axis and residuals (ranging from ~-7 to ~8) on the y-axis.
+    The title "residual-plot · plotnine · pyplots.ai" is clearly visible at the top.
+    Points are colored by type: blue (#306998) for "Normal" points and red/coral (#E74C3C)
+    for "Outlier" points. A dark blue horizontal reference line at y=0 represents
+    perfect predictions. Two gray dashed horizontal lines mark the ±2 standard deviation
+    boundaries for outlier detection. A yellow LOWESS smoothing line runs through
+    the data near zero, showing no strong systematic pattern. The legend labeled "Point
+    Type" appears on the right side. The background uses theme_minimal with subtle
+    gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=4) with good alpha (0.7) for 150 points;
+          slight deduction as some points at the boundaries are very close
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and red distinction is colorblind-safe; outliers clearly stand
+          out
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 16:9 canvas, plot fills appropriate space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Fitted Values" and "Residuals" are descriptive but lack units (1
+          point)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle but legend title says "Point Type" (should match data
+          semantics better); also the grid is slightly prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot with scatter points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on x-axis, residuals on y-axis as per spec
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has horizontal reference line at y=0, LOWESS smoothing line, ±2σ
+          bands, outlier coloring - all spec features present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly distinguishes Normal vs Outlier
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "residual-plot · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows outliers (4 points beyond ±2σ), random scatter around zero
+          indicating good fit, slight pattern from LOWESS; could show more heteroscedasticity
+          to demonstrate that use case
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Linear regression with synthetic data is plausible; slightly generic
+          (no real-world variable names)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Residuals ranging ±7 with std ~1.5 is realistic for regression diagnostics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with geom_point, geom_hline, geom_smooth,
+          scale_color_manual, and theme customization; could leverage more plotnine-specific
+          features like faceting or stat_smooth variants
+  verdict: APPROVED
diff --git a/plots/residual-plot/metadata/seaborn.yaml b/plots/residual-plot/metadata/seaborn.yaml
index 0d349cafcb..8e11fe1321 100644
--- a/plots/residual-plot/metadata/seaborn.yaml
+++ b/plots/residual-plot/metadata/seaborn.yaml
@@ -26,3 +26,177 @@ review:
     for this plot type
   - The Trend label in regplot line_kws does not appear in the legend (seaborn regplot
     does not add to legend automatically)
+  image_description: The plot shows a residual plot for housing price prediction.
+    Blue scatter points (labeled "Normal") represent residuals within 2 standard deviations,
+    while yellow points (labeled "Outlier (|z| > 2)") mark outliers beyond that threshold.
+    A solid black horizontal reference line at y=0 divides the plot. Gray dashed horizontal
+    lines mark the ±2 SD boundaries (~±55,000). A red curved polynomial trend line
+    (order=2) with a pink confidence band shows the U-shaped pattern in residuals,
+    revealing non-linearity in the underlying model. The x-axis shows "Fitted Values
+    (Predicted Price in $)" ranging from ~100,000 to ~600,000, and the y-axis shows
+    "Residuals (Actual - Predicted)" ranging from ~-60,000 to ~80,000. The title correctly
+    follows the format "residual-plot · seaborn · pyplots.ai". The legend in the upper
+    right shows "Normal", "Outlier (|z| > 2)", and "±2 SD". A subtle grid is visible
+    in the background.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at s=150 with alpha=0.7 appropriate for 150 points, though
+          slightly large
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Fitted Values (Predicted Price in $)" and
+          "Residuals (Actual - Predicted)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend has "Trend" missing (regplot legend not showing), ±2 SD appears
+          but trend line label absent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct residual plot showing residuals vs fitted values
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Fitted values on x-axis, residuals on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Reference line at y=0, outlier highlighting, trend line, ±2 SD bands
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Normal vs Outlier points
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "residual-plot · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows non-linear pattern (U-shape curve), outliers, variance in residuals
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Housing price prediction is a real, comprehensible regression scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: House prices $100K-600K and residuals ±60K are realistic for housing
+          data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.scatterplot and sns.regplot which are seaborn functions,
+          but regplot is used only for the trend line rather than its main purpose
+          (regression visualization). The implementation could leverage seaborn's
+          residplot function which is specifically designed for residual plots.
+  verdict: APPROVED
diff --git a/plots/ridgeline-basic/metadata/altair.yaml b/plots/ridgeline-basic/metadata/altair.yaml
index 1f3b4da95b..6202b0353a 100644
--- a/plots/ridgeline-basic/metadata/altair.yaml
+++ b/plots/ridgeline-basic/metadata/altair.yaml
@@ -26,3 +26,179 @@ review:
   - Grid lines extend across the full width creating slight visual noise on the right
     side where there is no data
   - Grid could be disabled entirely as it does not add value to this visualization
+  image_description: The plot displays a ridgeline visualization showing monthly temperature
+    distributions for 12 months (January through December). Each month is represented
+    as a separate row with a blue area density curve. The ridges use a gradient blue
+    color scheme (darker blues for winter months at top, transitioning through the
+    year). The curves overlap slightly vertically, creating the characteristic "mountain
+    ridge" appearance. The x-axis shows "Temperature (°C)" ranging from -14 to 40,
+    and each ridge is labeled with its 3-letter month abbreviation on the left. The
+    title reads "Monthly Temperature Distribution · ridgeline-basic · altair · pyplots.ai"
+    at the top. The seasonal temperature pattern is clearly visible - winter months
+    (Jan, Feb, Dec) peak around 2-6°C, while summer months (Jun, Jul, Aug) peak around
+    22-25°C. Dark blue outlines define each ridge, and the fill has good transparency
+    (0.8 opacity).
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, month labels well-positioned and readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curves are well-sized, opacity and stroke create clear visual
+          distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues color scheme is colorblind-safe sequential palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with ridges filling canvas well, slight issue with excess
+          whitespace on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'X-axis has descriptive label with units: "Temperature (°C)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is visible but very subtle (0.3 opacity), no legend needed for
+          this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ridgeline/joy plot with stacked density curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature on X-axis, groups (months) on Y/rows correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: overlapping curves, vertical stacking,
+          color differentiation, meaningful ordering (chronological)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range [-15, 40] shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; month labels serve the purpose
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Monthly Temperature Distribution · ridgeline-basic
+          · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: varying distribution widths (winter more variable),
+          seasonal progression, different peak locations'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature data for Northern Hemisphere - very realistic
+          and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Temperature values realistic: ~2°C in Jan, ~25°C in Jul, appropriate
+          standard deviations'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Altair''s declarative grammar: transform_density,
+          faceting with negative spacing for overlap, monotone interpolation'
+  verdict: APPROVED
diff --git a/plots/ridgeline-basic/metadata/bokeh.yaml b/plots/ridgeline-basic/metadata/bokeh.yaml
index 277ed6358c..04af0541c0 100644
--- a/plots/ridgeline-basic/metadata/bokeh.yaml
+++ b/plots/ridgeline-basic/metadata/bokeh.yaml
@@ -24,3 +24,179 @@ review:
   - Grid styling could include y-grid at very low alpha for visual reference
   - The output_file and save calls are somewhat redundant when the primary output
     is PNG via export_png
+  image_description: 'The plot displays a ridgeline/joy plot showing monthly temperature
+    distributions across all 12 months (January at top to December at bottom). Each
+    ridge shows a density curve with the horizontal axis representing Temperature
+    (°C) ranging from approximately -5 to 40. The colors transition from deep blue
+    (cold months: Jan, Feb, Dec) through cyan/teal (Mar, Apr, Nov), to green (May),
+    lime-yellow (Jun), bright yellow (Jul), orange (Aug), and salmon-peach (Sep, Oct).
+    The ridges overlap appropriately creating the signature mountain-range appearance.
+    The title "ridgeline-basic · bokeh · pyplots.ai" appears at the top left. The
+    Y-axis shows "Month" label with abbreviated month names as tick labels. The background
+    is a light gray (#FAFAFA). Vertical dashed grid lines are visible at regular temperature
+    intervals.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable but at smaller apparent size due
+          to 4800x2700 canvas; slightly smaller than optimal
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, ridges overlap appropriately as intended for the
+          plot type
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curves are well-sized and clearly visible with good fill
+          opacity (0.85)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color gradient that is generally colorblind-friendly, though
+          the green-yellow transition may be challenging for some
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" with units, "Month" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Y-grid is disabled (alpha=0), X-grid is subtle (dashed, alpha 0.3)
+          which is fine, but no legend needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ridgeline/joy plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature on X, months on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Partial overlap (~60-65%), vertical stacking, color differentiation
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within -5 to 40°C range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; month labels serve this purpose
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "ridgeline-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal temperature variation with different means and similar
+          spreads; could show more variety in distribution shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature data is a perfect real-world scenario, values
+          are plausible for a temperate climate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values realistic (5-27°C base means), though winter months
+          might be slightly warm for some climates
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.html and plot.png; output_file import unused in PNG
+          context
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's patch glyph with categorical y-axis offsets, which is
+          a reasonable approach; could leverage ColumnDataSource more
+  verdict: APPROVED
diff --git a/plots/ridgeline-basic/metadata/highcharts.yaml b/plots/ridgeline-basic/metadata/highcharts.yaml
index ab24518ca2..76b5761d9b 100644
--- a/plots/ridgeline-basic/metadata/highcharts.yaml
+++ b/plots/ridgeline-basic/metadata/highcharts.yaml
@@ -29,3 +29,190 @@ review:
   - Grid lines extend fully across the plot area - could be more subtle or limited
     to below the ridges
   - Right margin appears slightly larger than necessary, creating minor layout asymmetry
+  image_description: 'The plot displays a ridgeline visualization of monthly temperature
+    distributions. The chart shows 12 partially overlapping density curves stacked
+    vertically, one for each month from January (bottom) to December (top). The y-axis
+    displays month names as labels. The x-axis shows "Temperature (°C)" ranging from
+    -10 to 35. Colors transition from cool blue (winter months: January, February,
+    December) through greens (spring/fall: March, October, November) to warm yellows
+    and orange (summer months: May-September). Each density curve has a gradient fill
+    that fades to transparent at the baseline. The title reads "Monthly Temperatures
+    · ridgeline-basic · highcharts · pyplots.ai". The layout uses a white background
+    with subtle vertical grid lines. The distributions clearly show seasonal patterns
+    - winter months peak around 2-8°C while summer months peak around 17-24°C.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and month names are all clearly readable at full
+          size. Font sizes are appropriate for the 4800x2700 resolution.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap. Ridge overlaps are intentional and well-controlled
+          (~50-70% as specified).
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curves are clearly visible with appropriate line widths (3px)
+          and fill opacity (0.7).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette transitioning from blue to yellow/orange.
+          Avoids red-green conflicts.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good overall layout with appropriate margins. Minor deduction: slight
+          asymmetry with more whitespace on the right side.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with units "Temperature (°C)". Y-axis
+          displays month names as required by spec.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1). Legend disabled as appropriate for this
+          plot type since month labels are on y-axis. However, some grid lines extend
+          through the entire plot area which slightly distracts.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ridgeline/joy plot implementation using stacked area charts
+          with density curves.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values on x-axis, groups (months) stacked vertically
+          on y-axis.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: partial overlap (~70%), vertical stacking,
+          group labels on y-axis, meaningful color gradient.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (-10 to 35) appropriately captures all temperature distributions.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled appropriately; month labels serve as the legend via
+          y-axis positioning.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows required format: "Monthly Temperatures · ridgeline-basic
+          · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows seasonal progression well with varying means and spreads.
+          Summer months have narrower distributions (lower std), winter months have
+          wider spreads. Minor deduction: distributions are fairly similar in shape.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real scenario using monthly temperature data - matches the spec's
+          first application example perfectly.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (January ~2°C, July ~24°C) representing
+          typical temperate climate.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: Contains a KDE function definition which deviates from pure imports→data→plot→save
+          structure. However, it's justified since highcharts-core doesn't have built-in
+          KDE.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data generation.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports are used: numpy, highcharts classes, selenium, tempfile,
+          time, urllib, pathlib.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of AreaSeries with gradient fills, threshold baselines,
+          JavaScript formatter for y-axis labels. Uses Highcharts' strength in creating
+          interactive HTML output alongside PNG.
+  verdict: APPROVED
diff --git a/plots/ridgeline-basic/metadata/letsplot.yaml b/plots/ridgeline-basic/metadata/letsplot.yaml
index aadecf6021..7ad3611ddb 100644
--- a/plots/ridgeline-basic/metadata/letsplot.yaml
+++ b/plots/ridgeline-basic/metadata/letsplot.yaml
@@ -25,3 +25,176 @@ review:
   - Vertical grid lines could be made subtler or removed for a cleaner ridgeline aesthetic
   - Color palette (Spectral) is not fully colorblind-accessible; viridis or a sequential
     palette could improve accessibility
+  image_description: The plot displays a ridgeline visualization showing monthly temperature
+    distributions arranged vertically from January at the top to December at the bottom.
+    The distributions are rendered as smooth density curves with partial overlap,
+    creating the characteristic "mountain ridge" appearance. The Spectral color palette
+    is used, transitioning from purple/magenta tones for winter months (January, February,
+    December) through blue (March), teal/green (April, May), yellow-green (June, July,
+    August), and orange/coral (September, October, November). Each ridge has a white
+    border outline and semi-transparent fill. The x-axis shows "Temperature (°C)"
+    ranging from approximately -14 to 32, and month names are displayed on the y-axis.
+    The title reads "Monthly Temperature Distribution · ridgeline-basic · letsplot
+    · pyplots.ai" at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and month names are all clearly readable at appropriate
+          font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap; ridges overlap intentionally as per spec
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curves are well-sized with appropriate alpha for visual clarity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Spectral palette provides good differentiation but not fully colorblind-optimized
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, slight margin imbalance on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has "Temperature (°C)" with units, Y-axis appropriately shows
+          month names
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend correctly hidden (month labels sufficient), but vertical grid
+          lines could be subtler
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ridgeline/joy plot visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature on X, months as categorical Y creating ridges
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Vertical stacking with overlap (scale=1.2), color differentiation,
+          ordered groups
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full temperature range visible for all months
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend hidden appropriately since Y-axis labels identify groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "ridgeline-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation well; winter months have wider distributions
+          (higher std), summer months are tighter
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Northern hemisphere monthly temperatures are a perfect, relatable
+          example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature ranges are realistic; some slight extreme values visible
+          due to normal distribution tails
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_area_ridges which is lets-plot specific, scale_fill_brewer,
+          theme customization; could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/ridgeline-basic/metadata/matplotlib.yaml b/plots/ridgeline-basic/metadata/matplotlib.yaml
index 4fc4f68c40..9a8abe1b89 100644
--- a/plots/ridgeline-basic/metadata/matplotlib.yaml
+++ b/plots/ridgeline-basic/metadata/matplotlib.yaml
@@ -25,3 +25,162 @@ review:
   - Manual KDE implementation using list comprehension is less efficient than using
     scipy.stats.gaussian_kde
   - Grid could be slightly more subtle (alpha=0.2 instead of 0.3)
+  image_description: 'The plot displays a beautiful ridgeline visualization with 12
+    overlapping density curves representing monthly temperature distributions. The
+    months are arranged vertically from January at the top to December at the bottom.
+    A viridis color gradient is used: purple/dark blue for cold winter months (January,
+    February) transitioning through blue and teal to bright yellow-green for warmer
+    months (July, August). Each ridge shows a smooth bell-shaped density distribution.
+    Winter months peak around 0-5°C while summer months peak around 25°C, showing
+    a realistic Northern Hemisphere seasonal pattern. The x-axis shows ''Temperature
+    (°C)'' ranging from -10 to 40. Month names are clearly displayed as y-axis labels.
+    The title ''ridgeline-basic · matplotlib · pyplots.ai'' is prominently displayed.
+    The ridges have ~60% overlap creating the characteristic Joy Plot appearance.
+    A subtle dashed grid appears on the x-axis only.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at fontsize=24, xlabel at fontsize=20, y-tick labels at fontsize=14,
+          x-ticks at fontsize=16 - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; month labels are well-spaced and density curves
+          overlap intentionally as per ridgeline design
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curves are well-sized with scale=2.5, alpha=0.8 provides
+          excellent visibility while maintaining overlap aesthetics
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses viridis colormap which is colorblind-safe and has excellent
+          perceptual uniformity
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'X-axis has descriptive label with units: ''Temperature (°C)'''
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ridgeline/Joy Plot type with vertically stacked density curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis density, groups (months) to y-axis
+          positions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: partial overlap (~60%), vertical stacking,
+          group labels on y-axis, color differentiation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (-10 to 40°C) shows all data properly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; month labels serve as group identifiers
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: ''ridgeline-basic · matplotlib · pyplots.ai'''
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows seasonal variation with clear difference between winter/summer,
+          spring/fall transition months have realistic intermediate values and higher
+          variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature distributions for Northern Hemisphere is an excellent,
+          comprehensible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Temperature values are realistic: ~2-4°C in winter, ~22-25°C in
+          summer, with appropriate standard deviations'
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save, no functions or
+          classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/ridgeline-basic/metadata/plotly.yaml b/plots/ridgeline-basic/metadata/plotly.yaml
index 3155cb1016..7a5114fa76 100644
--- a/plots/ridgeline-basic/metadata/plotly.yaml
+++ b/plots/ridgeline-basic/metadata/plotly.yaml
@@ -25,3 +25,178 @@ review:
   weaknesses:
   - No legend explaining the color scale (cold→warm color mapping)
   - HTML output is generated but not strictly required for review
+  image_description: 'The plot displays a ridgeline visualization of monthly temperature
+    distributions. There are 12 vertically stacked density curves, one for each month
+    from January (bottom) to December (top). The color scheme transitions from deep
+    blue (Python Blue #306998) for winter months (January, February, December) through
+    lighter blues for transitional months, to warm yellow (#FFD43B) for June and orange/coral
+    tones for May, July, and August. The x-axis shows "Temperature (°C)" ranging from
+    -10 to 40, with clear tick marks. Month names are displayed on the y-axis as labels.
+    The title "ridgeline-basic · plotly · pyplots.ai" is centered at the top. Each
+    density curve shows smooth Gaussian-like distributions with appropriate overlap
+    (~50%), creating the characteristic ridge mountain appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, tick labels at 28pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlaps; month labels well-spaced; density curves overlap
+          intentionally as per spec
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Density curves are well-sized and visible; slight deduction as some
+          curves could have slightly more distinct edges
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-to-yellow gradient is colorblind-friendly; no red-green reliance
+          for differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; left margin accommodates
+          month labels
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis shows "Temperature (°C)" with proper units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle, but no legend present (not strictly needed for labeled
+          ridgelines, but could help with color interpretation)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ridgeline/joy plot type with stacked density curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values mapped to X, months create vertical stacking
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: partial overlap (~50%), vertical stacking,
+          color differentiation, meaningful order (chronological)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (-15 to 40) covers all temperature data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Month labels serve as legend; accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "ridgeline-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows seasonal pattern with cold winters, warm summers, and transitional
+          months with more variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Northern hemisphere temperature pattern is realistic; slight deduction
+          as specific city context could be clearer
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (-2°C to 26°C base temps with appropriate
+          standard deviations)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also saves plot.html (not wrong, just extra)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Plotly graph_objects
+        score: 3
+        max: 5
+        passed: true
+        comment: Correctly uses go.Scatter with fill and custom hover templates; could
+          leverage more Plotly-specific features like animation or rangeslider
+  verdict: APPROVED
diff --git a/plots/ridgeline-basic/metadata/plotnine.yaml b/plots/ridgeline-basic/metadata/plotnine.yaml
index 6abfb73e6f..4636843e98 100644
--- a/plots/ridgeline-basic/metadata/plotnine.yaml
+++ b/plots/ridgeline-basic/metadata/plotnine.yaml
@@ -27,3 +27,185 @@ review:
     horizontal lines at y=0 for each month)
   - Manual KDE computation using scipy.stats rather than leveraging plotnine's native
     density statistics
+  image_description: 'The plot displays a ridgeline/joy plot showing monthly temperature
+    distributions throughout the year. The plot uses a cool-to-warm color gradient:
+    winter months (Jan, Feb, Dec) appear in deep blue (#306998), transitioning through
+    teals and greens for spring months (Mar-May), to golden yellows (#FFD43B) for
+    summer months (Jul-Aug), and back through amber/ochre tones for fall (Sep-Nov).
+    Each month is represented as a smoothly overlapping density curve stacked vertically,
+    creating the characteristic "mountain ridge" appearance. The x-axis shows Temperature
+    (°C) ranging from -10 to 40, and the y-axis displays month labels (Jan through
+    Dec). The title correctly reads "ridgeline-basic · plotnine · pyplots.ai". The
+    plot has a clean minimal theme with subtle gray grid lines only on the x-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear (~24pt), axis labels are appropriately sized
+          (~20pt), tick labels are readable (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, month labels are cleanly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curves are well-sized with appropriate overlap (~60%), ribbon
+          fills with alpha=0.85 and dark outlines make each distribution clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color gradient from blue to yellow, though the progression relies
+          somewhat on hue distinctions; colorblind users might struggle differentiating
+          some adjacent months
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills the canvas well with balanced margins, 16:9 aspect ratio
+          properly utilized
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has "Temperature (°C)" with units, Y-axis has "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Horizontal grid lines are removed (good) but there are visible baseline
+          lines at y=0 for each ridge that create visual noise; legend appropriately
+          hidden since colors are self-explanatory through month labels
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ridgeline/joy plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = temperature values, Y = grouped by month with vertical stacking
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Overlapping density curves (~60% overlap), meaningful ordering (chronological),
+          color differentiation, Y-axis shows group labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full temperature range shown (-10 to 40°C) capturing all distributions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; month labels serve as identification
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "ridgeline-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation excellently with different means and spreads;
+          winter months are colder with moderate spread, summer months warmer with
+          tighter distributions. Could show more variance in distribution shapes (e.g.,
+          bimodality)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature distribution is a perfect real-world application,
+          temperatures are seasonally appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values are realistic for temperate climate; range of
+          -10 to 40 covers the data well though some summer peaks extending past 35°C
+          is slightly high
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → density computation → plot
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at beginning
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, plotnine components, scipy.stats)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" correctly, but verbose=False parameter is good
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of ggplot grammar with geom_ribbon for ridges, scale_fill_manual
+          for custom colors, theme customization. However, doesn't use plotnine's
+          native stat_density or geom_density_ridges (if available via extension),
+          instead manually computes KDE with scipy
+  verdict: APPROVED
diff --git a/plots/ridgeline-basic/metadata/pygal.yaml b/plots/ridgeline-basic/metadata/pygal.yaml
index 202bfa51a1..5fc77963fc 100644
--- a/plots/ridgeline-basic/metadata/pygal.yaml
+++ b/plots/ridgeline-basic/metadata/pygal.yaml
@@ -24,3 +24,184 @@ review:
   - Vertical grid lines slightly prominent; could use lower alpha for subtler appearance
   - Does not leverage distinctive pygal features like built-in interactivity or tooltips
     in the HTML output
+  image_description: 'The plot displays a ridgeline visualization of monthly temperature
+    distributions. There are 12 semi-transparent, overlapping density curves stacked
+    vertically, one for each month (January at top, December at bottom). The curves
+    use a seasonal color gradient: cold months (Jan, Feb, Dec) in blue, spring months
+    transitioning through cyan/teal and green, summer months (Jun-Aug) in yellow/orange,
+    and fall months cycling back through green to blue. The X-axis shows "Temperature
+    (°C)" ranging from -10 to 40. Month labels (Jan through Dec) are displayed on
+    the Y-axis. The title reads "Monthly Temperature Distribution · ridgeline-basic
+    · pygal · pyplots.ai". The layout shows good overlap (~60%) between ridges, creating
+    the characteristic mountain-ridge appearance. Background is white with subtle
+    vertical grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and month labels are all clearly readable with
+          appropriate font sizes for 4800×2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap; month labels are well-separated and density curves
+          overlap intentionally as per spec
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curves are well-sized with good opacity (0.85), distinct
+          shapes visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent color gradient from blue through cyan, green, yellow to
+          orange; colorblind-safe progression
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; minor deduction for slightly more whitespace
+          on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has "Temperature (°C)" with units; Y-axis has month labels
+          as per spec
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines could be more subtle (currently same alpha as in style);
+          no legend needed per spec but vertical guides are slightly prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ridgeline/joy plot with stacked density curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values correctly mapped to X-axis, months as groups on
+          Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Partial overlap (~60%), color gradient for seasons, month labels
+          on Y-axis all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (-10 to 40°C) covers all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; month labels serve as identification per spec guidance
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Monthly Temperature Distribution · ridgeline-basic
+          · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows seasonal variation with winter months centered around 0-5°C
+          and summer months around 20-25°C; different spreads visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Monthly temperature data is a classic real-world use case; plausible
+          values for temperate climate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range (-10 to 40°C) and monthly distributions are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → KDE computation → chart
+          creation → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style imported; all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses pygal.XY with fill=True creatively to simulate ridgeline, but
+          pygal doesn't have native ridgeline support. This is a clever workaround
+          using polygon construction, but doesn't showcase distinctive pygal features
+          like built-in chart types or interactivity.
+  verdict: APPROVED
diff --git a/plots/ridgeline-basic/metadata/seaborn.yaml b/plots/ridgeline-basic/metadata/seaborn.yaml
index ad762ad26a..d766316ffa 100644
--- a/plots/ridgeline-basic/metadata/seaborn.yaml
+++ b/plots/ridgeline-basic/metadata/seaborn.yaml
@@ -27,3 +27,182 @@ review:
     structure preference
   - Could benefit from slightly more variation in distribution spread between months
     to showcase the full capability of ridgeline plots
+  image_description: The plot displays a ridgeline/joy plot showing monthly temperature
+    distributions across all 12 months. The layout is vertical with December at the
+    top and January at the bottom. The density curves have partial overlap creating
+    the characteristic "mountain ridge" appearance. The color scheme uses a coolwarm
+    palette - cold months (December, November, January, February) appear in blue shades
+    while warm months (June, July, August) appear in red/coral shades. Month names
+    are displayed as bold labels on the left side, matching the color of their respective
+    ridge. The x-axis shows "Temperature (°C)" ranging from approximately -10 to 30.
+    The title "ridgeline-basic · seaborn · pyplots.ai" is prominently displayed at
+    the top. Each ridge has a white outline and the distributions clearly show seasonal
+    temperature patterns - winter months centered around 0-5°C and summer months around
+    20-25°C.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at ~26pt, month labels at ~20pt bold, axis labels at ~22pt,
+          tick labels at ~18pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, month labels well-spaced, ridges overlap appropriately
+          by design
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Density curves are well-sized with appropriate alpha (0.8), white
+          outlines enhance visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Coolwarm palette provides good differentiation; blue-to-red gradient
+          is colorblind-friendly for sequential data
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, balanced margins, plot fills appropriate
+          portion of canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has "Temperature (°C)" with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid (appropriate for this plot type), but no legend; month labels
+          serve as legend which is acceptable for ridgeline plots
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ridgeline/joy plot with stacked density curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature as continuous variable, months as categorical groups
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Vertical stacking with ~50% overlap, group labels displayed, color
+          gradient applied, meaningful ordering (chronological)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full temperature range displayed (-10 to 30°C)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Month labels are accurate and color-coded
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "ridgeline-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation well, different distribution widths/centers;
+          could show more variation in spread between months
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly temperature data is a classic application mentioned in spec,
+          values are realistic for temperate climate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (winter ~2-4°C, summer ~21-24°C
+          base temps, 3.5°C standard deviation)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear flow but has one small helper function `label` for
+          text positioning
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses FacetGrid effectively for ridgeline construction, kdeplot for
+          density estimation, coolwarm palette. However, this is a fairly standard
+          seaborn ridgeline pattern rather than showcasing unique features.
+  verdict: APPROVED
diff --git a/plots/roc-curve/metadata/altair.yaml b/plots/roc-curve/metadata/altair.yaml
index 2071d32c37..480635ac5e 100644
--- a/plots/roc-curve/metadata/altair.yaml
+++ b/plots/roc-curve/metadata/altair.yaml
@@ -24,3 +24,180 @@ review:
   - Y-axis has excessive tick density making it appear cluttered
   - No interactivity or tooltips added despite Altair's strength in this area
   - Grid lines could be styled more subtly with smaller dash pattern
+  image_description: The plot displays two ROC curves comparing classifier performance.
+    A blue solid line represents the "Good Model" (AUC = 0.96) that rises steeply
+    from the origin and hugs the top-left corner, indicating excellent classification
+    performance. A yellow/gold solid line shows the "Moderate Model" (AUC = 0.71)
+    with a more gradual curve. A gray dashed diagonal line represents the random classifier
+    baseline (AUC = 0.50). The title "roc-curve · altair · pyplots.ai" appears at
+    the top. The X-axis is labeled "False Positive Rate" (0 to 1), and the Y-axis
+    is labeled "True Positive Rate" (0 to 1). A legend in the lower right clearly
+    identifies all three lines with their AUC values. The plot uses a square 1:1 aspect
+    ratio with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend text are all clearly
+          readable with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are well-separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are well-sized with strokeWidth=4 for model curves and 3 for
+          diagonal; however the dense tick labels on y-axis make it slightly busy
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe; gray diagonal is appropriately muted
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square format is appropriate for ROC curves; plot fills canvas well
+          but legend positioning could be slightly better integrated
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "False Positive Rate" and "True Positive Rate"
+          (no units needed for rates 0-1)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is placed in lower-right but overlaps slightly with the plot
+          area. Grid is subtle with alpha 0.3.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ROC curve visualization with TPR vs FPR
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=FPR, Y=TPR correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line present, AUC displayed in legend, multiple
+          models compared with distinct colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes range from 0 to 1 as specified
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels are accurate with AUC scores
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "roc-curve · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two models with different performance levels plus random baseline;
+          demonstrates full ROC curve behavior from (0,0) to (1,1)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic model comparison scenario is plausible; could benefit from
+          a more concrete domain context (e.g., medical diagnosis)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All values correctly in 0-1 range; AUC values are realistic (0.96
+          good, 0.71 moderate)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data generation → ROC computation →
+          plotting → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses np.trapezoid which is correct for newer numpy versions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair declarative encoding, layered charts, and configure methods
+          correctly but does not leverage interactive features or tooltips that make
+          Altair distinctive
+  verdict: APPROVED
diff --git a/plots/roc-curve/metadata/bokeh.yaml b/plots/roc-curve/metadata/bokeh.yaml
index 88178288d1..22bbe91d77 100644
--- a/plots/roc-curve/metadata/bokeh.yaml
+++ b/plots/roc-curve/metadata/bokeh.yaml
@@ -25,3 +25,179 @@ review:
     the plot area or with more padding
   - Does not utilize Bokeh's interactive features like HoverTool to show threshold
     values on hover
+  image_description: 'The plot displays an ROC curve visualization on a square canvas
+    with a light gray (#FAFAFA) background. Three ROC curves are shown: a blue curve
+    (Random Forest, AUC=0.87) that bows strongly toward the top-left indicating good
+    performance, a yellow/gold curve (Logistic Regression, AUC=0.74) with moderate
+    curvature, and a red curve (Decision Tree, AUC=0.62) closer to the diagonal. A
+    gray dashed diagonal line represents the random classifier baseline. The title
+    "roc-curve · bokeh · pyplots.ai" appears centered at the top. Both axes are labeled
+    descriptively ("False Positive Rate" and "True Positive Rate") and range from
+    0 to 1. The legend is positioned in the bottom-right corner showing all four items
+    with their AUC scores. Grid lines are subtle with dashed styling.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt, legend at
+          20pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend positioned away from curves
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 5 provides excellent visibility, curves clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red provide good distinction; yellow might be slightly
+          light but still distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Square format is appropriate for ROC curves; however legend placement
+          in extreme bottom-right corner creates some visual imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("False Positive Rate", "True Positive Rate")
+          but lack units (though rates are unitless, could add "(0-1)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed) which is good; legend text is
+          readable but could be better positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ROC curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: FPR on X-axis, TPR on Y-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes diagonal reference line, AUC scores in legend, multiple
+          models with distinct colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes range from 0 to 1 as required
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly match curves with accurate AUC values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows exact format "roc-curve · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three classifiers with varying performance levels demonstrating
+          the full range of ROC behavior; could show a near-perfect classifier for
+          completeness
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses realistic ML model names (Random Forest, Logistic Regression,
+          Decision Tree) with plausible AUC values
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: AUC values are realistic (0.62-0.87); the mathematical model used
+          (t^k) produces valid ROC curves
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports are used, but Legend import could be combined
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Legend model, and export_png/save for HTML;
+          however doesn't leverage Bokeh's interactive features like HoverTool which
+          would enhance ROC curve exploration
+  verdict: APPROVED
diff --git a/plots/roc-curve/metadata/highcharts.yaml b/plots/roc-curve/metadata/highcharts.yaml
index f7ddc3e947..113d9c53ca 100644
--- a/plots/roc-curve/metadata/highcharts.yaml
+++ b/plots/roc-curve/metadata/highcharts.yaml
@@ -26,3 +26,179 @@ review:
     on the curve
   - Example data shows only one high-performing classifier; could include comparison
     with a second model to better demonstrate ROC curve usage
+  image_description: 'The plot shows a ROC curve on a white background. The main ROC
+    curve is displayed as a blue area chart (Python Blue #306998) with a gradient
+    fill from darker blue at the top to lighter blue at the bottom, representing the
+    area under the curve. The curve shows excellent classifier performance, starting
+    near the top-left and reaching the top quickly. A dashed olive/yellow diagonal
+    line represents the random classifier reference (y=x). The title "roc-curve ·
+    highcharts · pyplots.ai" appears at the top in bold. The subtitle "Binary Classifier
+    Performance" is shown below. The X-axis is labeled "False Positive Rate" (0 to
+    1), Y-axis is labeled "True Positive Rate" (0 to 1). A legend in the bottom-right
+    shows "ROC Curve (AUC = 0.967)" and "Random Classifier (AUC = 0.5)". Grid lines
+    are subtle light gray. The layout fills the canvas well with proper margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, axis labels at 36px, tick labels at 28px, all perfectly
+          readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: ROC curve line is thick (6px), diagonal line visible with dashed
+          style, gradient fill clearly shows AUC area
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and olive/yellow (#8B8000) are colorblind-safe, no
+          red-green combinations
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good margins but legend is positioned in bottom-right corner slightly
+          overlapping the "1" tick mark, could be better positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"False Positive Rate" and "True Positive Rate" are descriptive (units
+          not applicable for rates 0-1)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), but legend partially overlaps plot area
+          and the "1" tick label on x-axis
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ROC curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: FPR on X-axis, TPR on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes diagonal reference line, AUC score in legend, both required
+          per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes range from 0 to 1 as specified
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows ROC Curve with AUC value and Random Classifier
+          reference
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "roc-curve · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows a good classifier (AUC=0.967) but doesn't show variation like
+          comparing multiple models or a poor classifier
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Binary classification scenario with beta distributions simulating
+          positive/negative class score separation is realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 500 samples, 200 thresholds, AUC ~0.97 all sensible values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current numpy and highcharts APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses AreaSeries with gradient fill which is nice, but doesn't leverage
+          Highcharts' interactive features like tooltips with threshold values
+  verdict: APPROVED
diff --git a/plots/roc-curve/metadata/letsplot.yaml b/plots/roc-curve/metadata/letsplot.yaml
index 310e187d8e..4e2af3f539 100644
--- a/plots/roc-curve/metadata/letsplot.yaml
+++ b/plots/roc-curve/metadata/letsplot.yaml
@@ -24,3 +24,172 @@ review:
   - Code contains a function definition (compute_roc) which violates the KISS principle
     - should be inline calculations
   - Grid alpha not explicitly set (could be more subtle)
+  image_description: |-
+    The plot displays an ROC curve visualization with three curves on a 16:9 aspect ratio canvas. The plot uses a clean minimal theme with a light gray grid. The title "roc-curve · letsplot · pyplots.ai" is displayed at the top. The X-axis shows "False Positive Rate" (0 to 1) and the Y-axis shows "True Positive Rate" (0 to 1). Three curves are shown:
+    - **Model A (AUC = 0.97)** - Blue curve (#306998) showing excellent classifier performance, hugging the top-left corner
+    - **Model B (AUC = 0.71)** - Yellow/gold curve (#FFD43B) showing moderate classifier performance
+    - **Random (AUC = 0.50)** - Gray diagonal line (#888888) representing the random baseline
+    The legend is positioned at the bottom with the label "Classifier". The plot has equal 1:1 aspect ratio for the axes (coord_fixed). All text is legible and appropriately sized. Lines are thick enough (size=2) to be clearly visible.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis titles 20pt, axis text 16pt, legend text 16pt -
+          all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line size=2 is optimal, curves clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Gray palette is colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with coord_fixed, legend at bottom well positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (FPR/TPR are unitless ratios, so
+          acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle but could be lighter (alpha not explicitly set)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ROC curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: FPR on X-axis, TPR on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line present, AUC scores in legend, multiple models
+          compared with distinct colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes range from 0 to 1 as specified
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly show model names and AUC values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "roc-curve · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good vs moderate classifier, plus random baseline - demonstrates
+          range of performance but could show a poor classifier too
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Binary classification scenario is realistic, synthetic data from
+          beta distributions is appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: AUC values plausible (0.97 is very good, 0.71 moderate) though 0.97
+          is quite high
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Has a function `compute_roc` which violates KISS principle (no functions/classes
+          rule)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current numpy trapezoid function
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (ggplot, aes, geom_line), coord_fixed for aspect
+          ratio, scale_color_manual, theme_minimal, ggsize - good library usage but
+          nothing particularly unique to lets-plot
+  verdict: APPROVED
diff --git a/plots/roc-curve/metadata/matplotlib.yaml b/plots/roc-curve/metadata/matplotlib.yaml
index 230d4a799f..ce018e749e 100644
--- a/plots/roc-curve/metadata/matplotlib.yaml
+++ b/plots/roc-curve/metadata/matplotlib.yaml
@@ -27,3 +27,177 @@ review:
   - Legend placement in lower right corner slightly crowds the curve convergence area
   - Axis labels lack explicit mention that these are rates/proportions (though standard
     for ROC curves)
+  image_description: 'The plot displays three ROC curves comparing classifier performance:
+    Random Forest (blue, AUC=0.98), Logistic Regression (yellow/gold, AUC=0.91), and
+    Decision Tree (green, AUC=0.66). A dashed gray diagonal line represents the random
+    classifier baseline (AUC=0.50). The blue curve has a light blue filled area beneath
+    it. The x-axis shows "False Positive Rate" (0.0-1.0) and the y-axis shows "True
+    Positive Rate" (0.0-1.0). The title reads "roc-curve · matplotlib · pyplots.ai"
+    at the top. A legend is positioned in the lower right corner. The background has
+    a subtle grid with dashed lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths of 3.5 are excellent for this data density, curves clearly
+          distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green are distinguishable for colorblind users,
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but missing units (though rates don't typically
+          need units, dimensionless)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid at alpha=0.3 is good, but legend placed in lower right partially
+          overlaps the area where curves converge
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ROC curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: FPR on X-axis, TPR on Y-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line present, AUC displayed in legend, multiple
+          models with distinct colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes range from 0 to 1 as specified
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify models with AUC values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "roc-curve · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows excellent (0.98), good (0.91), and fair (0.66) classifiers,
+          demonstrating full range of ROC curve behavior
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Model names (Random Forest, Logistic Regression, Decision Tree) are
+          realistic, though synthetic data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All values properly bounded 0-1, AUC values are realistic for these
+          model types
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Contains two helper functions (compute_roc and compute_auc) which
+          violates the KISS principle of no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current API (np.trapezoid is modern)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses fill_between for area under curve which is a nice matplotlib
+          feature, but could have used more advanced features like annotations or
+          custom styling
+  verdict: APPROVED
diff --git a/plots/roc-curve/metadata/plotly.yaml b/plots/roc-curve/metadata/plotly.yaml
index 9a9e7fc761..51acb4c10d 100644
--- a/plots/roc-curve/metadata/plotly.yaml
+++ b/plots/roc-curve/metadata/plotly.yaml
@@ -27,3 +27,179 @@ review:
     computation - violates KISS principle requiring no functions/classes
   - Legend position overlaps slightly with the plot area; could be positioned outside
     or with more careful placement
+  image_description: The plot displays two ROC curves on a white background with subtle
+    gray gridlines. The blue curve (Logistic Regression, AUC = 0.97) shows excellent
+    classifier performance, hugging the top-left corner with a light blue shaded area
+    underneath. The yellow curve (Decision Tree, AUC = 0.74) shows moderate performance,
+    positioned between the blue curve and the diagonal. A dashed gray diagonal line
+    represents the random classifier baseline (y=x). The title "roc-curve · plotly
+    · pyplots.ai" is centered at the top in dark gray text. Axes are labeled "False
+    Positive Rate" (x-axis) and "True Positive Rate" (y-axis), both ranging 0-1 with
+    0.2 tick intervals. The legend is positioned in the bottom-right with a white
+    background and subtle border, showing all three elements with their labels and
+    AUC scores.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          target resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths are appropriate, curves are distinct and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, high contrast against white
+          background
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas with equal aspect ratio, but legend placement
+          creates slight asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (rates are unitless, but could note
+          "proportion" or similar)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), legend has good styling but slightly overlaps
+          the plot area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ROC curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: FPR on x-axis, TPR on y-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line, AUC in legend, multiple models with distinct
+          styles
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes correctly range from 0 to 1
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all curves with accurate AUC values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "roc-curve · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good vs moderate classifier, diagonal baseline; could show
+          a poor classifier too
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Binary classification with Logistic Regression vs Decision Tree is
+          a realistic ML comparison scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: AUC values (0.97 and 0.74) are realistic; 0.97 is slightly high for
+          typical real-world models
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Contains two helper functions (`calculate_roc` and `calculate_auc`)
+          violating KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses `go.Scatter` with fill option, interactive HTML export, proper
+          layout configuration
+  verdict: APPROVED
diff --git a/plots/roc-curve/metadata/plotnine.yaml b/plots/roc-curve/metadata/plotnine.yaml
index b994c0059c..40688d653a 100644
--- a/plots/roc-curve/metadata/plotnine.yaml
+++ b/plots/roc-curve/metadata/plotnine.yaml
@@ -24,3 +24,184 @@ review:
   - Legend placement in lower-right creates slight visual imbalance; consider positioning
     at (0.7, 0.3)
   - Minor discrepancy between computed AUC values and displayed values
+  image_description: 'The plot displays two ROC curves on a square canvas with a 1:1
+    aspect ratio. The title "roc-curve · plotnine · pyplots.ai" is prominently displayed
+    at the top in bold black text. The x-axis shows "False Positive Rate" and the
+    y-axis shows "True Positive Rate", both ranging from 0.0 to 1.0 with tick marks
+    at 0.2 intervals. Two model curves are plotted: a yellow/gold curve for Random
+    Forest (AUC = 0.86) and a blue curve for Logistic Regression (AUC = 0.72). A gray
+    dashed diagonal line represents the random classifier baseline. The legend is
+    positioned in the lower-right portion of the plot with a light gray background.
+    An italicized annotation "Diagonal = Random Classifier" appears near the bottom
+    of the plot. The grid is subtle with light gray lines. The overall color scheme
+    is clean with good contrast between the two model curves.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title is large and bold, axis labels
+          are substantial, tick labels and legend text are appropriately sized'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are thick (size=2.5) and clearly visible with good alpha (0.9),
+          perfectly adapted for the curve visualization
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and Yellow (#FFD43B) are colorblind-safe, good contrast
+          against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of square aspect ratio which is appropriate for ROC curves,
+          but the legend placement in the lower-right creates slight visual imbalance
+          with the diagonal annotation
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "False Positive Rate" and "True Positive Rate"
+          but no units (though rates are unitless, so this is acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is appropriately subtle, but legend background border is slightly
+          heavy
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ROC curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: FPR on x-axis, TPR on y-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: diagonal reference line, AUC in legend,
+          distinct colors for models'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes range from 0 to 1 as required
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies models with AUC values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "roc-curve · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows two models with different AUC values demonstrating good vs
+          moderate classifiers, curves are smooth and realistic. Minor deduction:
+          could show a third model closer to random for fuller range'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Random Forest vs Logistic Regression is a classic, realistic ML comparison
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: AUC values of 0.72 and 0.86 are realistic for these model types;
+          however the code computes AUC values of ~0.92 and ~0.78 but displays 0.86
+          and 0.72 - slight discrepancy in the visual
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used but imports numpy random seed when deterministic
+          generation would suffice
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: 'Good use of plotnine''s grammar of graphics: ggplot, aes, geom_line,
+          geom_abline, coord_fixed, theme customization. Could leverage more plotnine-specific
+          features like faceting or statistical transformations'
+  verdict: APPROVED
diff --git a/plots/roc-curve/metadata/pygal.yaml b/plots/roc-curve/metadata/pygal.yaml
index 61394efcbb..4847b0cb6b 100644
--- a/plots/roc-curve/metadata/pygal.yaml
+++ b/plots/roc-curve/metadata/pygal.yaml
@@ -24,3 +24,175 @@ review:
   - ROC curves for two models only; adding a third classifier would demonstrate more
     variation
   - Model A AUC of 0.98 is unrealistically high for most real-world scenarios
+  image_description: 'The plot displays an ROC curve visualization on a white background
+    with a clean layout. Three series are shown: Model A (blue line, AUC = 0.98) which
+    rises steeply near FPR=0 and plateaus near TPR=1.0, Model B (yellow line, AUC
+    = 0.72) which shows a more gradual curve, and a gray diagonal reference line for
+    Random classification (AUC = 0.50). The title "roc-curve · pygal · pyplots.ai"
+    appears at the top center. The X-axis is labeled "False Positive Rate" (0 to 1)
+    and Y-axis is labeled "True Positive Rate" (0 to 1). The legend is positioned
+    in the top-left corner outside the plot area. Grid lines are subtle and aid readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend text are all readable. Font sizes
+          are well-scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths are appropriate; all three curves are clearly distinguishable.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and gray provide good contrast. Blue/yellow distinction
+          works for most colorblind types but not ideal for tritanopia.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; legend placement is
+          sensible.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("False Positive Rate", "True Positive Rate")
+          but lack units (though unitless 0-1 is standard for ROC).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and helpful; legend is functional but placed outside
+          plot area which creates some empty space in top-left.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct XY line chart for ROC curve visualization.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: FPR on X-axis, TPR on Y-axis as specified.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes diagonal reference line, AUC scores in legend, multiple
+          models with distinct colors.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Both axes range from 0 to 1 as required.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows model names with AUC values.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows exact format: "roc-curve · pygal · pyplots.ai".'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows good vs average model comparison with clear AUC difference.
+          Could benefit from a third model to show more variation.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Binary classification model comparison is a realistic ML scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: AUC values (0.98 and 0.72) are realistic. Model A at 0.98 is perhaps
+          slightly too perfect.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure without functions/classes.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducible results.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style are imported; all are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal XY chart with custom Style, dashed line for reference.
+          Could leverage more pygal-specific features like tooltips or better interactivity
+          in the HTML output.
+  verdict: APPROVED
diff --git a/plots/roc-curve/metadata/seaborn.yaml b/plots/roc-curve/metadata/seaborn.yaml
index e0702cdc3f..b7e3f47147 100644
--- a/plots/roc-curve/metadata/seaborn.yaml
+++ b/plots/roc-curve/metadata/seaborn.yaml
@@ -25,3 +25,177 @@ review:
     distinctive features
   - Yellow/gold color for Logistic Regression could be slightly harder to distinguish
     for colorblind viewers
+  image_description: 'The plot displays an ROC curve visualization with three model
+    curves and a diagonal reference line. The title "roc-curve · seaborn · pyplots.ai"
+    appears at the top in bold black text. The X-axis shows "False Positive Rate (FPR)"
+    from 0.0 to 1.0, and the Y-axis shows "True Positive Rate (TPR)" from 0.0 to 1.0
+    with equal aspect ratio. Three ROC curves are displayed: Random Forest in blue
+    (AUC = 0.967), Logistic Regression in yellow/gold (AUC = 0.714), and Decision
+    Tree in red (AUC = 0.642). A gray dashed diagonal line represents the Random Classifier
+    (AUC = 0.500). The legend is positioned in the lower right corner with good readability.
+    The background uses a white grid with subtle dashed lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 26pt bold, axis labels at 22pt, tick labels at 18pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are thick (3.5 linewidth) and clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, red palette is reasonably distinguishable but yellow-gold
+          could be confused with lighter colors for some colorblind viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Equal aspect ratio, plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with abbreviations in parentheses: "False Positive
+          Rate (FPR)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha is 0.3 which is good, but the grid overlays on top of
+          data lines rather than being behind them; legend is well placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ROC curve visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: FPR on X-axis, TPR on Y-axis as required
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal reference line present, AUC scores displayed in legend,
+          multiple models for comparison, axes range 0-1
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes properly show 0-1 range with small padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify models and AUC values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "roc-curve · seaborn · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows three different performance levels (good ~0.97, moderate ~0.71,
+          weak ~0.64) plus random baseline
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Realistic ML model comparison scenario (Random Forest, Logistic Regression,
+          Decision Tree)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: AUC values are realistic for the model types shown
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data generation → ROC calculation → plot
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current numpy/seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Only uses sns.lineplot and sns.set_style/set_context. This is basic
+          seaborn usage - lineplot is not a distinctive seaborn feature. Could have
+          used seaborn's color palettes more effectively or its statistical visualization
+          features.
+  verdict: APPROVED
diff --git a/plots/rose-basic/metadata/altair.yaml b/plots/rose-basic/metadata/altair.yaml
index 0c451275d3..0627f5fd39 100644
--- a/plots/rose-basic/metadata/altair.yaml
+++ b/plots/rose-basic/metadata/altair.yaml
@@ -25,3 +25,175 @@ review:
   - Month labels missing from outer edge - only values shown without category context
     on chart itself
   - Legend placed far to the right isolated from the chart rather than integrated
+  image_description: The plot displays a rose chart (Nightingale/coxcomb diagram)
+    showing monthly rainfall data in a radial format. The chart features 12 colorful
+    wedge segments representing each month, with radius proportional to rainfall values
+    (ranging from 22mm for July to 92mm for November). Colors used include Python
+    blue (Jan), yellow (Feb), teal (Mar), coral (Apr), mint (May), salmon (Jun), sage
+    (Jul), gold (Aug), sky blue (Sep), lavender (Oct), pink (Nov), and seafoam (Dec).
+    Two dashed concentric gridlines appear at approximately 75mm and 100mm levels
+    with labels on the right side. Value labels (22-92) are positioned near each segment.
+    A legend in the upper right lists all months with their corresponding colors.
+    The title "rose-basic · altair · pyplots.ai" appears at the top. The chart is
+    positioned in the upper portion of the canvas with significant empty space below.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, value labels, and legend text are all clearly readable at
+          full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; value labels well positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: All 12 wedge segments clearly visible with appropriate sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-friendly palette with distinct hues for each month
+      - id: VQ-05
+        name: Layout Balance
+        score: 2
+        max: 5
+        passed: false
+        comment: Chart occupies upper-left area with excessive whitespace below; not
+          centered
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for radial chart without traditional axes (gridlines provide
+          scale)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed gridlines at 75mm and 100mm; legend well formatted
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rose/coxcomb chart type with radial segments
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to segment radius
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: cyclical data, radial gridlines, 12-month
+          pattern'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All values visible within gridline scale (0-100mm)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to months
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "rose-basic · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows full variation: low summer values (22-35mm), high autumn/winter
+          (78-92mm)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly rainfall pattern is realistic and relatable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in plausible rainfall range (22-92mm)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (altair, numpy, pandas, PIL for post-processing)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Creates intermediate file plot_raw.png before final plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of mark_arc, layered composition, and polar coordinates;
+          however post-processing with PIL for cropping detracts from pure Altair
+          solution
+  verdict: APPROVED
diff --git a/plots/rose-basic/metadata/bokeh.yaml b/plots/rose-basic/metadata/bokeh.yaml
index 1c7eb59d9b..b0e1c58e89 100644
--- a/plots/rose-basic/metadata/bokeh.yaml
+++ b/plots/rose-basic/metadata/bokeh.yaml
@@ -25,3 +25,178 @@ review:
     be more informative
   - Month label for Dec appears slightly inside the wedge rather than outside like
     other labels
+  image_description: The plot displays a rose chart (coxcomb/Nightingale diagram)
+    showing monthly rainfall data. The chart is circular with 12 wedges representing
+    each month (Jan-Dec) arranged clockwise starting from January at the top. The
+    wedge radii are proportional to rainfall values, with December and January showing
+    the largest petals (highest rainfall), while July shows the smallest (lowest rainfall).
+    Colors transition from Python Blue (#306998 and variants) for higher values to
+    Python Yellow (#FFD43B) for lower values (June, July, August visible in yellow).
+    Month labels are positioned around the outside. Concentric dashed gray circles
+    serve as gridlines at 25%, 50%, 75%, and 100% levels. The title "Monthly Rainfall
+    · rose-basic · bokeh · pyplots.ai" appears at the top. Scale labels (25%, 50%,
+    75%, 100%) are positioned on the right side. The overall layout is clean with
+    a white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, month labels, and scale labels are readable; title at 28pt,
+          labels at 18pt, scale at 14pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; month labels well-spaced around the
+          chart
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Wedges are clearly visible with good alpha (0.8); white borders provide
+          good separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-yellow color scheme is colorblind-safe; good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart fills canvas well, centered with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels (N/A for rose chart, but spec mentions radial gridlines
+          - they exist but the scale uses percentage rather than actual rainfall values)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Dashed gridlines are subtle (alpha 0.3), no legend needed for single-category
+          data
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rose/coxcomb chart implementation using wedges
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (months) correctly mapped to angles, values to radii
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Equal-angle wedges, radius proportional to value, radial gridlines,
+          circular arrangement
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 months visible, full range of values displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No explicit legend, color meaning could be clearer
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Monthly Rainfall · rose-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal rainfall pattern with clear variation; demonstrates
+          high winter, low summer rainfall
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly rainfall in mm is a realistic, comprehensible scenario for
+          a rose chart
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Rainfall values (15-90 mm) are realistic for temperate climate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → angles → plot → styling → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set (though data is actually deterministic)'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: numpy, bokeh.io, bokeh.models, bokeh.plotting'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, wedge glyph, and proper Bokeh figure configuration;
+          could leverage more interactive features but PNG output limits this
+  verdict: APPROVED
diff --git a/plots/rose-basic/metadata/highcharts.yaml b/plots/rose-basic/metadata/highcharts.yaml
index f85eac394e..6e5d569eb7 100644
--- a/plots/rose-basic/metadata/highcharts.yaml
+++ b/plots/rose-basic/metadata/highcharts.yaml
@@ -26,3 +26,176 @@ review:
   - Y-axis tick labels could include units (mm) for better clarity
   - Could benefit from Highcharts-specific interactive features in the HTML version
     (tooltip formatting)
+  image_description: 'The plot displays a polar column (rose) chart with 12 wedge-shaped
+    segments representing monthly rainfall data. The segments are arranged clockwise
+    starting from January at the top (12 o''clock position). Each wedge is filled
+    with Python Blue (#306998) color, with radius proportional to the rainfall value.
+    Data labels are displayed on each segment showing values: Jan (78→shown as 95
+    in label area), Dec (92), Nov (85), Oct (68), Sep (42), Aug (22), Jul (18), Jun
+    (25), May (38), Apr (45), Mar (65), Feb (52→shown as 78). The chart has a white
+    background, polygon-style radial gridlines, and includes the correct title format
+    "rose-basic · highcharts · pyplots.ai" with subtitle "Monthly Rainfall (mm)".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and subtitle are readable; month labels and data values are
+          legible but could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Wedges are clearly visible; some smaller values (Jul 18, Aug 22)
+          create very small segments but still distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart fills canvas well, good proportions with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Y-axis has "Rainfall (mm)" but no proper axis label with units visible
+          on the radial axis ticks
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle polygon gridlines, legend present and unobtrusive
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rose/coxcomb chart using polar column
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (months) on angular axis, values determine radius correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has radial gridlines, categorical labels, single color scheme as
+          per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 months displayed with full value range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend shows "Rainfall" correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "rose-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation with winter high/summer low pattern; good
+          range of values from 18-92mm
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly rainfall in mm is a perfect real-world scenario for rose
+          charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (18-92mm) are realistic for monthly rainfall; could show more
+          dramatic variation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves as plot.png (correct) but note: startAngle=-15 shifts Jan
+          slightly off 12 o''clock'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts polar chart capability with gridLineInterpolation="polygon",
+          but could leverage more Highcharts-specific features like tooltips or animations
+          in the HTML output
+  verdict: APPROVED
diff --git a/plots/rose-basic/metadata/letsplot.yaml b/plots/rose-basic/metadata/letsplot.yaml
index e0553ad5a3..3a284870ce 100644
--- a/plots/rose-basic/metadata/letsplot.yaml
+++ b/plots/rose-basic/metadata/letsplot.yaml
@@ -26,3 +26,176 @@ review:
     cleaner appearance
   - The y-axis title appears on the left side which is slightly awkward for polar
     coordinates
+  image_description: The plot displays a rose chart (polar bar chart) showing monthly
+    rainfall distribution. The chart is centered on the canvas with months arranged
+    clockwise starting from January at the top (12 o'clock position). Each wedge represents
+    a month with the radius proportional to rainfall amount. Colors transition smoothly
+    from Python Blue (#306998) for winter months through greens and yellows to Python
+    Yellow (#FFD43B) for December. July has the longest bar (168mm), while February
+    has the shortest (65mm). The title reads "Monthly Rainfall Distribution · rose-basic
+    · letsplot · pyplots.ai" in bold at the top. Radial gridlines are visible at 20mm
+    intervals (0, 20, 40, 60, 80, 100, 120, 140, 160). The y-axis label "Rainfall
+    (mm)" appears on the left side. Month labels are positioned around the outer edge
+    of the chart.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis text at 16pt, all clearly readable. Slightly
+          crowded month labels but acceptable.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized with 0.85 alpha, good visibility. Slightly tight
+          spacing between adjacent bars.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Gradient from blue to yellow is colorblind-safe, no red-green dependency
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Rose chart is well-centered, fills ~60% of canvas, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis labeled "Rainfall (mm)" with units, x-axis appropriately blank
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend disabled (appropriate), but radial gridlines could be more
+          subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rose/coxcomb chart using polar coordinates
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Months on angular axis, rainfall on radial axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: equal-angle wedges, radius proportional
+          to value, radial gridlines'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the chart bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly disabled (colors are per-month gradient, self-explanatory)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{description} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal rainfall pattern with summer peak (Jul=168mm) and
+          winter trough (Feb=65mm). Good variation across months.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly rainfall is a realistic application mentioned in spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Rainfall values 65-168mm are plausible for temperate climate, though
+          slightly high for some months
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: 'Data is deterministic (hardcoded), but np.random.seed not needed.
+          Minor: no explicit seed statement.'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses lets-plot's ggplot grammar with coord_polar, scale_fill_manual,
+          and theme customization. Could leverage more interactive features.
+  verdict: APPROVED
diff --git a/plots/rose-basic/metadata/matplotlib.yaml b/plots/rose-basic/metadata/matplotlib.yaml
index 71a8d3006f..192acbb5c6 100644
--- a/plots/rose-basic/metadata/matplotlib.yaml
+++ b/plots/rose-basic/metadata/matplotlib.yaml
@@ -23,3 +23,172 @@ review:
   - No legend or explicit radial axis label explaining that radius = rainfall (mm)
   - Data variation is moderate; more dramatic peaks/valleys would better showcase
     the rose chart format
+  image_description: The plot displays a rose chart (polar bar chart) showing monthly
+    rainfall data in millimeters. The chart uses a blue color (#306998) for all 12
+    segments representing months from January to December. Months are labeled in bold
+    around the perimeter, starting from January at the top (12 o'clock position) and
+    progressing clockwise. The radial axis shows values from 0 to ~200mm with gridlines
+    at 25, 50, 75, 100, 125, 150, 175, and 200. The segment radii correspond to rainfall
+    values, with July showing the highest rainfall (~180mm) and February the lowest
+    (~72mm). The title reads "Monthly Rainfall (mm) · rose-basic · matplotlib · pyplots.ai"
+    at the top. The outer ring has a blue border matching the bar color.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, month labels at 18pt bold, radial ticks at 14pt -
+          all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap; month labels well-spaced around the perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars clearly visible with good sizing; white edges provide separation;
+          minor deduction as some smaller segments could be more distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe; good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight deduction for radial tick labels
+          overlapping with "Feb" label position
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Title includes units "(mm)" which serves as the value description
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend present (not strictly needed
+          for single-color rose chart, however the chart could benefit from labeling
+          what the radius represents)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rose/coxcomb chart using polar bar plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Months correctly mapped to angles, rainfall values to radius
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Equal-angle wedges, radius proportional to value, radial gridlines
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 months visible, radial axis extends appropriately to 1.15x
+          max value
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; single category with clear title
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Monthly Rainfall (mm) · rose-basic · matplotlib
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation pattern (summer peak, winter low); could
+          show more dramatic variation between adjacent months
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly rainfall is a realistic and commonly used application for
+          rose charts; values are plausible for a temperate climate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values range from 72-180mm which is realistic; could benefit from
+          more dramatic range to better showcase the visualization
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses hardcoded deterministic data (no random seed needed), but lacks
+          np.random.seed() even though numpy is imported
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/rose-basic/metadata/plotly.yaml b/plots/rose-basic/metadata/plotly.yaml
index f418a2e027..b94d65a735 100644
--- a/plots/rose-basic/metadata/plotly.yaml
+++ b/plots/rose-basic/metadata/plotly.yaml
@@ -26,3 +26,178 @@ review:
   - Data range is somewhat narrow (35-85mm); more extreme variation would better showcase
     the plot type
   - Could add a subtitle or annotation explaining the data context
+  image_description: The plot displays a rose/coxcomb chart showing monthly rainfall
+    data. It features 12 wedge-shaped segments arranged in a circle, one for each
+    month (Jan through Dec), starting from the top at 12 o'clock position and proceeding
+    clockwise. The segments' radii correspond to rainfall values in mm. The color
+    scheme transitions from yellow (Python Yellow, ~#FFD43B) for lower values to blue
+    (Python Blue, ~#306998) for higher values. July has the smallest radius (~35mm,
+    yellow), while December has the largest (~85mm, blue). Radial gridlines at 0,
+    20, 40, 60, and 80 mm help gauge values. The title "rose-basic · plotly · pyplots.ai"
+    appears centered at the top. Month labels are positioned around the outer edge
+    of the chart. The overall layout is clean with a white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear, month labels are easily readable, radial
+          axis labels are legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; month labels well-spaced around the
+          perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Wedges are clearly visible with good sizing; white borders between
+          segments aid distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Yellow-to-blue gradient is colorblind-friendly (avoids red-green);
+          good choice
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions; chart utilizes canvas well though slightly more
+          centered vertically than ideal
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Radial axis includes unit "mm" suffix; month labels are descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle radial gridlines present; no legend needed for single-series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rose/coxcomb chart using barpolar
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories (months) on angular axis, values (rainfall) as radius
+          - correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Equal-angle wedges, radius proportional to value, circular arrangement
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, radial axis shows full range (0-80+ mm)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; color scale implicit and appropriate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "rose-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation in rainfall; demonstrates high winter/low
+          summer pattern; could show more extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly rainfall data is a classic rose chart application; values
+          are realistic for temperate climate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 35-85mm are plausible; perhaps slightly narrow range
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded values) but no random seed statement
+          even though not needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported, and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API (go.Barpolar)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dimensions
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses barpolar for rose chart, hover template for interactivity, and
+          exports HTML version; could leverage more plotly-specific features like
+          animations or custom hover data
+  verdict: APPROVED
diff --git a/plots/rose-basic/metadata/plotnine.yaml b/plots/rose-basic/metadata/plotnine.yaml
index ce9c336603..fca156d1bf 100644
--- a/plots/rose-basic/metadata/plotnine.yaml
+++ b/plots/rose-basic/metadata/plotnine.yaml
@@ -26,3 +26,180 @@ review:
   - Figure size is 12x12 at 300 DPI (3600x3600) which is acceptable but the spec prefers
     16x9 for landscape or explicit 3600x3600 for square
   - Could use plotnine built-in theme functions more extensively
+  image_description: The plot displays a rose chart (coxcomb diagram) showing monthly
+    rainfall data in millimeters. The chart features 12 wedge-shaped segments arranged
+    in a clockwise direction starting from January at the top (12 o'clock position).
+    Each wedge extends from the center with a radius proportional to the rainfall
+    value. The color scheme transitions from blue (lower values like March/April/May
+    around 48-55mm) through olive/green tones to yellow (higher values like November
+    at 95mm). Radial gridlines are visible as dashed circles at 20, 40, 60, 80, and
+    100mm intervals. Spoke lines separate the months. Month labels (Jan, Feb, etc.)
+    are positioned outside the chart in bold black text. Value labels (20, 40, 60,
+    80, 100) appear along the top spoke. The title "Monthly Rainfall (mm) · rose-basic
+    · plotnine · pyplots.ai" is centered at the top. The plot uses a square 1:1 aspect
+    ratio with a clean white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and month labels are clearly readable, value labels slightly
+          small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Wedges are well-sized and clearly visible with appropriate alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue-to-yellow gradient is colorblind-friendly, though not using
+          a standard accessible palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of square canvas, plot is well-centered with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Title includes units "(mm)" - appropriate for rose chart without
+          traditional axes
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and helpful, but legend is hidden (acceptable for
+          this single-series chart)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rose/coxcomb chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Radius correctly proportional to value (not area)
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has radial gridlines, starts at 12 o'clock, clockwise arrangement;
+          small gaps between wedges add polish
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 months visible with appropriate scale
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, legend appropriately hidden)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Monthly Rainfall (mm) · rose-basic · plotnine ·
+          pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation across months with clear seasonal patterns; could
+          benefit from more extreme contrasts
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly rainfall for temperate climate is an excellent, real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values (48-95mm) are realistic for monthly rainfall
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot construction → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data (no randomness)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but figure_size is 12x12 (should be 16x9 or 12x12
+          at dpi to achieve target resolution)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics with geom_polygon and layered
+          construction, but doesn't leverage coord_polar() since plotnine lacks it
+          - manual polygon construction is a workaround
+  verdict: APPROVED
diff --git a/plots/rose-basic/metadata/seaborn.yaml b/plots/rose-basic/metadata/seaborn.yaml
index 5425777488..43f4f8b966 100644
--- a/plots/rose-basic/metadata/seaborn.yaml
+++ b/plots/rose-basic/metadata/seaborn.yaml
@@ -28,3 +28,181 @@ review:
     for single-series data)
   - Corner whitespace in square format is unavoidable but slightly reduces canvas
     efficiency
+  image_description: The plot displays a rose/Nightingale chart showing monthly rainfall
+    data in millimeters. The chart is circular with 12 segments (one per month), starting
+    at January at the 12 o'clock position and proceeding clockwise. Each segment uses
+    a gradient from the "Blues" seaborn palette - lighter blues for earlier months
+    transitioning to darker blues for later months. The bar heights (radii) are proportional
+    to rainfall values, with June showing the highest rainfall (168mm) and February
+    the lowest (72mm). Each bar has a value label positioned just outside it. Radial
+    gridlines are shown at 50mm, 100mm, and 150mm with subtle gray dashed styling.
+    The title reads "Monthly Rainfall (mm) · rose-basic · seaborn · pyplots.ai" at
+    the top. The layout uses a square 12x12 inch format appropriate for radial plots.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt, month labels are 20pt bold, gridline labels 14pt,
+          value labels 14pt bold - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; month labels and value labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths and heights are well-proportioned for the 12 categories
+          with good alpha (0.9)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues sequential palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good square format for radial plot, but some wasted space in corners
+          inherent to circular plots
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Radial labels include units ("50 mm", "100 mm", "150 mm")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid alpha at 0.3 is appropriate, but no legend present to indicate
+          what the color gradient represents
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rose/Nightingale chart with equal-angle wedges and radius
+          proportional to value
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on angular axis, values on radial axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: equal-angle wedges, radius proportional
+          to value, radial gridlines, start at top'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis extends to max_val * 1.15, showing all data with room for
+          labels
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (single data series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "Monthly Rainfall (mm) · rose-basic · seaborn
+          · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows seasonal variation with clear peaks (summer) and troughs (winter),
+          but could show more dramatic variation to better demonstrate rose chart
+          capabilities
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly rainfall is a classic application mentioned in the spec;
+          values represent a plausible temperate climate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Rainfall values (72-168mm) are realistic monthly ranges
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed comment; minor
+          deduction
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib/seaborn APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses seaborn color palette (sns.color_palette) but the actual plotting
+          uses matplotlib's polar bar chart. No seaborn plot functions like sns.barplot
+          are used (polar bars aren't natively supported by seaborn).
+  verdict: APPROVED
diff --git a/plots/rug-basic/metadata/altair.yaml b/plots/rug-basic/metadata/altair.yaml
index 7840761035..27dbc8a3ba 100644
--- a/plots/rug-basic/metadata/altair.yaml
+++ b/plots/rug-basic/metadata/altair.yaml
@@ -25,3 +25,169 @@ review:
     Time (ms) for a realistic scenario)
   - Large empty space above rug marks reduces canvas utilization, though inherent
     to standalone rug plots
+  image_description: 'The plot displays a basic rug plot with vertical blue tick marks
+    (#306998 color with 0.6 opacity) positioned at the bottom of the chart along the
+    x-axis. The x-axis is labeled "Measurement Value" and ranges from 6 to 80. The
+    tick marks clearly demonstrate a bimodal distribution: a dense cluster of marks
+    around values 14-32 and a sparser cluster around 45-68, with a visible gap between
+    the two clusters (around 36-42). The title "rug-basic · altair · pyplots.ai" is
+    displayed at the top in appropriately sized font. The plot uses semi-transparency
+    which helps distinguish overlapping tick marks in dense regions.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 28pt, axis labels 22pt, tick labels 18pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Tick marks clearly visible with strokeWidth=3 and opacity=0.6
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Large empty space above rug marks, though inherent to standalone
+          rug plots
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Measurement Value" is descriptive but lacks units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha 0.3), no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rug plot using Altair's mark_rule
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis positions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows tick marks with alpha for overlap, positioned at bottom
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Domain [5, 80] shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A, no legend needed for single-variable rug
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "rug-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Bimodal distribution excellently demonstrates clustering patterns
+          AND gaps
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Generic "Measurement Value" is plausible but not a specific real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sensible numeric values (25 and 55 centers with realistic spread)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (altair, numpy, pandas) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good declarative approach with mark_rule and y/y2 encoding for tick
+          height
+  verdict: APPROVED
diff --git a/plots/rug-basic/metadata/highcharts.yaml b/plots/rug-basic/metadata/highcharts.yaml
index ad0cdf0a9a..b20b058447 100644
--- a/plots/rug-basic/metadata/highcharts.yaml
+++ b/plots/rug-basic/metadata/highcharts.yaml
@@ -26,3 +26,182 @@ review:
     much shorter ticks positioned at the axis edge
   - The tick line width (5px) is adequate but could be slightly bolder for better
     visibility of sparse outliers
+  image_description: 'The plot displays a basic rug plot showing API response times
+    in milliseconds. The visualization consists of vertical blue lines (tick marks)
+    arranged along the x-axis from 0 to 600ms. There are three distinct clusters of
+    tick marks: a dense cluster around 50ms (fast responses), another cluster around
+    100-150ms (medium responses), and a sparser cluster around 250ms (slow responses).
+    Three isolated outlier ticks appear at approximately 380ms, 420ms, and 510ms.
+    The title "rug-basic · highcharts · pyplots.ai" appears at the top in bold, with
+    a subtitle "API Response Times (ms)" below it. The x-axis is labeled "Response
+    Time (ms)" with tick marks at 50ms intervals. The tick marks use a semi-transparent
+    blue color (#306998 with 0.6 alpha), and no y-axis labels or grid lines are shown,
+    creating a clean, focused visualization.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title (72px), subtitle (48px), axis labels (48px), and tick labels
+          (36px) are all clearly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Rug ticks are visible with good alpha transparency (0.6) showing
+          clustering patterns clearly; tick marks could be slightly thicker for better
+          visibility at the sparse end
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe Python blue (#306998); no red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas space; tick marks span most of the vertical
+          plot area with tight y-axis max (1.2)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'X-axis has descriptive label with units: "Response Time (ms)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid lines (appropriate for rug plot), no legend needed; however,
+          no baseline emphasis to anchor the ticks visually
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rug plot implementation with vertical tick marks along axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis positions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows tick marks at exact data locations, uses transparency for overlapping
+          points, consistent tick height
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (0-600) shows all data points including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "rug-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows clustering, gaps, and outliers well; demonstrates the key
+          value of rug plots. Minor: could have one more distinct cluster to show
+          even more variation'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: API response times is an excellent real-world scenario where rug
+          plots shine
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times 10-510ms are realistic; outliers at 380-510ms are
+          plausible for slow requests
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → chart config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but also saves plot.html (minor, but
+          spec doesn't require HTML)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses LineSeries to create vertical tick marks; creative solution
+          for Highcharts which lacks native rug plot support. Uses Selenium for PNG
+          export as required. Could leverage more Highcharts features like animation
+          or hover states.
+  verdict: APPROVED
diff --git a/plots/rug-basic/metadata/letsplot.yaml b/plots/rug-basic/metadata/letsplot.yaml
index 3a813babce..e09bf91ae8 100644
--- a/plots/rug-basic/metadata/letsplot.yaml
+++ b/plots/rug-basic/metadata/letsplot.yaml
@@ -24,3 +24,172 @@ review:
     maintaining the small relative to plot requirement
   - Grid lines could be more subtle (current alpha appears around 0.5, spec prefers
     0.2-0.4)
+  image_description: The plot displays a basic rug plot with a density curve overlay.
+    The visualization uses a blue color (#306998) for both elements. The density curve
+    shows a bimodal distribution with a primary peak around 120ms and a secondary
+    peak around 250ms, followed by a long right tail extending to ~800ms. Below the
+    density curve, small vertical tick marks (the rug) are positioned along the x-axis
+    at y=0 to ~0.0004, showing the exact location of each data point. The clustering
+    is clearly visible in the rug marks - dense in the 80-180ms region, moderately
+    dense around 200-300ms, sparse around 350-500ms, and a few isolated outliers extending
+    to ~780ms. Title reads "rug-basic · letsplot · pyplots.ai" at top. X-axis labeled
+    "Response Time (ms)", Y-axis labeled "Density". Light gray gridlines on minimal
+    theme background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at proper
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Rug marks visible but quite small/thin; density curve clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Response Time (ms)" and "Density"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is present but somewhat dominant; could be more subtle
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rug plot with tick marks along x-axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis position
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows rug marks with density complement as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed/present for single-series plot (N/A but docking
+          2)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "rug-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clusters, gaps, and outliers as spec mentions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Response times in milliseconds is a plausible real scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 80-780ms are realistic for API response times
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" but uses path="." instead of direct filename
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_segment for rug marks (creative workaround since letsplot
+          lacks geom_rug), ggsize, theme customization, but no letsplot-specific interactive
+          features leveraged
+  verdict: APPROVED
diff --git a/plots/rug-basic/metadata/matplotlib.yaml b/plots/rug-basic/metadata/matplotlib.yaml
index b2249a4123..4205d83a1c 100644
--- a/plots/rug-basic/metadata/matplotlib.yaml
+++ b/plots/rug-basic/metadata/matplotlib.yaml
@@ -25,3 +25,174 @@ review:
     context)
   - Could demonstrate outlier detection capability mentioned in the spec
   - Basic vlines implementation rather than matplotlib more specialized options
+  image_description: 'The plot displays a basic rug plot with vertical blue tick marks
+    (#306998) along a horizontal axis ranging from 10 to 90. The title "rug-basic
+    · matplotlib · pyplots.ai" appears at the top in black text. Three clusters of
+    tick marks are visible: a dense cluster around value 25 (labeled "Dense cluster
+    (n=50)"), a wider spread cluster around value 55 (labeled "Wider spread (n=35)"),
+    and a small group around value 75 (labeled "Small group (n=15)"). The x-axis is
+    labeled "Measurement Value". The y-axis is hidden, with only the bottom spine
+    visible. Annotations in blue text above each cluster explain the data distribution.
+    The layout is clean with good use of whitespace.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, xlabel at 20pt, ticks at 16pt, annotations at 16pt
+          - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, annotations well-spaced above clusters
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Tick marks visible with good linewidth=3, alpha=0.7 handles overlap
+          well; slightly tall ticks relative to typical rug plots
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998), no color differentiation needed, good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills appropriate space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Measurement Value" is descriptive but lacks units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid (appropriate for rug plot), no legend needed, but bottom
+          spine could be more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rug plot using vertical tick marks
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis positions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows individual data points as tick marks, demonstrates clustering
+          and gaps
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis (5-90) shows all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type, annotations serve as descriptive labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "rug-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clustering, gaps, and different densities; could also show
+          potential outliers as mentioned in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Trimodal measurement data is plausible but somewhat abstract; a more
+          concrete scenario (e.g., response times, measurements) would be stronger
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 15-80 are sensible for generic measurements
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ax.vlines() which is appropriate but basic; could leverage matplotlib's
+          EventCollection or rugplot-specific methods for a more idiomatic implementation
+  verdict: APPROVED
diff --git a/plots/rug-basic/metadata/plotly.yaml b/plots/rug-basic/metadata/plotly.yaml
index 790f96b7aa..07caac015e 100644
--- a/plots/rug-basic/metadata/plotly.yaml
+++ b/plots/rug-basic/metadata/plotly.yaml
@@ -25,3 +25,176 @@ review:
   - The rug ticks could be slightly taller/more prominent given the large canvas size
   - Could leverage Plotly distinctive features more (e.g., combine with histogram/kde,
     use animations, or add more interactive features)
+  image_description: The plot displays a rug plot with vertical tick marks (small
+    blue lines) arranged along a horizontal axis at y=0. The ticks use a steel blue
+    color (#306998) with semi-transparency (opacity 0.6). The x-axis is labeled "Response
+    Time (ms)" with values ranging from approximately 10 to 75. The title "rug-basic
+    · plotly · pyplots.ai" is centered at the top in large black text. A subtle gray
+    grid is visible in the background. The data clearly shows a bimodal distribution
+    with dense clustering around 20-35ms and 45-70ms, with a visible gap around 35-45ms
+    containing only two isolated points. The layout is clean with a white background
+    (plotly_white template).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, tick labels at 28pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Tick marks are visible with good size (20px), though could be slightly
+          taller for better visibility at this canvas size
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme with good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, though the rug plot itself occupies a narrow
+          horizontal band with significant empty space above and below (appropriate
+          for rug plots but creates visual imbalance)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" includes units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), but no legend needed; however y-axis
+          grid lines are unnecessary for a rug plot and add visual noise
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rug plot with tick marks along axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to x-axis positions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows clustering, gaps, and individual data points as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, showlegend=False correctly set
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "rug-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Excellent bimodal distribution showing clustering and gap; could
+          show overlapping points more distinctly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Response times in milliseconds is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 10-75ms are plausible for response times, though the gap region
+          with only 2 points feels slightly artificial
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Scatter with line-ns marker symbol creatively for rug effect,
+          but doesn't leverage Plotly's interactive hover well (hovertemplate is good
+          but basic); could use px.strip or combine with histogram/kde for richer
+          visualization
+  verdict: APPROVED
diff --git a/plots/rug-basic/metadata/plotnine.yaml b/plots/rug-basic/metadata/plotnine.yaml
index 1e293d119d..98115a5a61 100644
--- a/plots/rug-basic/metadata/plotnine.yaml
+++ b/plots/rug-basic/metadata/plotnine.yaml
@@ -25,3 +25,178 @@ review:
     the high-resolution canvas
   - Large empty space above the rug is inherent to standalone rug plots but creates
     an unbalanced visual
+  image_description: 'The plot shows a rug plot displaying response time data as vertical
+    tick marks along the bottom x-axis. The title "rug-basic · plotnine · pyplots.ai"
+    is displayed at the top in black text. The x-axis is labeled "Response Time (ms)"
+    and shows values from approximately 100 to 850 ms. Blue tick marks (#306998 color)
+    are positioned at the bottom of the plot area, clearly showing three clusters:
+    a dense cluster around 100-200ms (fast responses), a moderate cluster around 250-400ms
+    (medium responses), and a sparse cluster around 400-550ms (slow responses), plus
+    4 isolated outliers between 600-850ms. The plot uses a minimal theme with subtle
+    vertical grid lines and no y-axis elements, which is appropriate for a pure rug
+    plot. The overall layout is clean with good whitespace balance.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis label at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Rug ticks are visible with alpha=0.7 and size=1.5, but could be slightly
+          more prominent for the high-resolution canvas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good horizontal use but the plot area is mostly empty space above
+          the rug (by design for rug plots, but could benefit from slightly taller
+          ticks or a complementary element)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" includes units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle vertical grid lines, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rug plot using geom_rug()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times correctly mapped to x-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows tick marks at exact data positions, uses alpha for overlapping
+          points, positioned on bottom axis
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible from ~100ms to ~850ms
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "rug-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clustering (3 distinct groups), gaps between clusters, and
+          outliers - demonstrates all key rug plot features well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Response times are a perfect real-world use case with plausible clustering
+          patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for response times (100-850ms), though the outliers
+          could be slightly more extreme to better demonstrate outlier detection
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics with geom_rug(), theme_minimal(),
+          and theme customization. Good use of element_blank() to hide y-axis elements.
+          Could have demonstrated additional plotnine features like faceting or combining
+          with geom_density().
+  verdict: APPROVED
diff --git a/plots/rug-basic/metadata/seaborn.yaml b/plots/rug-basic/metadata/seaborn.yaml
index 6889ef5696..a35b8497bc 100644
--- a/plots/rug-basic/metadata/seaborn.yaml
+++ b/plots/rug-basic/metadata/seaborn.yaml
@@ -22,3 +22,172 @@ review:
   weaknesses:
   - Could complement the rug plot with a KDE or histogram overlay to better demonstrate
     spec note that rug plots work best as a complement to other plots
+  image_description: 'The plot shows a basic rug plot with blue vertical tick marks
+    (#306998 color) along the bottom of the canvas. The x-axis is labeled "Response
+    Time (ms)" ranging from 0 to 600. The title reads "rug-basic · seaborn · pyplots.ai"
+    at the top. The rug marks clearly show a bimodal distribution: a dense cluster
+    of ~80 marks around 100-200ms (fast responses), a sparser cluster of ~40 marks
+    around 300-400ms (slow responses), and a few isolated outlier marks near 50ms,
+    520ms, and 550ms. A subtle dashed gray grid (alpha ~0.3) appears on the x-axis
+    only. The y-axis has no labels or ticks, providing clean whitespace above the
+    rug marks.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, x-label 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Rug marks clearly visible with good height (0.15) and line width
+          (2.5), slight density in clusters but alpha=0.7 helps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good 16:9 layout, rug marks positioned at bottom with clean space
+          above
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" includes units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is appropriate, but no legend needed for single-variable rug
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rug plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Continuous values correctly mapped to x-axis tick marks
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows clustering, gaps, outliers, and uses alpha for overlapping
+          marks
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range 0-600ms shows all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-variable plot, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "rug-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Bimodal distribution shows clustering AND gaps, plus outliers at
+          edges
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Response times with fast/slow modes is a realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 50-550ms are realistic for server response times
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current seaborn API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.rugplot which is the correct function, but this is basic
+          library usage rather than distinctive seaborn features
+  verdict: APPROVED
diff --git a/plots/sankey-basic/metadata/altair.yaml b/plots/sankey-basic/metadata/altair.yaml
index 3f2bcbb58b..48c3336439 100644
--- a/plots/sankey-basic/metadata/altair.yaml
+++ b/plots/sankey-basic/metadata/altair.yaml
@@ -26,3 +26,170 @@ review:
     wider color variation would improve distinguishability
   - Code complexity is high due to manual polygon generation; this is necessary since
     Altair lacks native Sankey support, but adds maintenance burden
+  image_description: 'The plot displays a Sankey diagram showing energy flow from
+    four source nodes (Renewable, Nuclear, Gas, Coal) positioned on the left side
+    to four target sector nodes (Transport, Industrial, Commercial, Residential) on
+    the right. The flows are rendered as smooth S-curved bands with semi-transparent
+    fill (opacity ~0.55), colored by source category. Source nodes use a blue palette
+    (Coal: #306998, Gas: #4A8BC6, Nuclear: #2D5986) with Renewable in yellow (#FFD43B).
+    Target nodes are colored distinctly (teal shades for Residential/Commercial, coral/red
+    for Industrial/Transport). Node labels are positioned adjacent to their respective
+    rectangles in bold black text. The title "sankey-basic · altair · pyplots.ai"
+    appears centered at top with subtitle "Energy Flow from Sources to Sectors". A
+    legend in the bottom-right corner shows the "Energy Source" color mapping.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, labels at 20pt bold, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels positioned outside nodes
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Flow bands well-sized, nodes visible with stroke, slight deduction
+          as some crossing flows can be hard to trace
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color distinction overall, though blue shades for Coal/Gas/Nuclear
+          are somewhat similar
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas, nodes well-spaced, balanced margins
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed in bottom-right, no grid needed for Sankey
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Sankey diagram with nodes and flow links
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source→Target flows with proportional widths correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: categorical nodes, flow values, distinct
+          colors'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All flows visible, node heights proportional to totals
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows energy source colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "sankey-basic · altair · pyplots.ai" used
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple sources flowing to multiple targets with varying magnitudes;
+          slight deduction as no extremely dominant or extremely minor flows shown
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Energy flow from sources to sectors is a classic, realistic Sankey
+          use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (10-45 units) are plausible; could benefit from showing more
+          extreme differences
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Linear flow structure, but complex manual polygon calculation; no
+          functions/classes used though
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded flows), no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as "plot.png" but also saves "plot.html" which is fine
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Excellent use of Altair's declarative layering, mark_line with filled
+          polygons, detailed encoding with tooltips, and legend configuration
+  verdict: APPROVED
diff --git a/plots/sankey-basic/metadata/bokeh.yaml b/plots/sankey-basic/metadata/bokeh.yaml
index c12ae6cff9..5819df36d5 100644
--- a/plots/sankey-basic/metadata/bokeh.yaml
+++ b/plots/sankey-basic/metadata/bokeh.yaml
@@ -24,3 +24,174 @@ review:
   - Missing legend to explain the color scheme for viewers not reading labels
   - Could leverage Bokeh interactive features (hover tooltips showing flow details)
   - Font sizes could be slightly larger for optimal readability at full resolution
+  image_description: 'The plot displays a Sankey diagram showing energy flow from
+    5 source nodes on the left (Solar 11 TWh, Hydro 15 TWh, Nuclear 30 TWh, Gas 65
+    TWh, Coal 35 TWh) to 3 target nodes on the right (Commercial 45 TWh, Residential
+    53 TWh, Industrial 58 TWh). The flows are rendered as smooth bezier curves connecting
+    sources to destinations, with widths proportional to flow values. Colors used:
+    orange for Solar, light blue for Hydro, purple for Nuclear, yellow/gold for Gas,
+    and Python blue (#306998) for Coal. Target nodes are colored teal (Commercial),
+    red (Residential), and dark blue-grey (Industrial). The title "Energy Flow · sankey-basic
+    · bokeh · pyplots.ai" is centered at the top. The background is a light grey (#FAFAFA).
+    All labels include the node name and total value in TWh. Flow opacity is set at
+    0.5, allowing overlapping flows to be distinguished.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable at 22pt for labels and 32pt for title. Slightly
+          small for the 4800x2700 canvas but still clear.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap; labels are well-positioned outside nodes
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Flows are clearly visible with appropriate widths proportional to
+          values; 0.5 alpha allows overlapping flows to be distinguished
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Good colorblind-safe palette with distinct hues (orange, blue, purple,
+          yellow, teal, red, dark grey)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with balanced margins; plot fills approximately
+          70% of canvas width
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend provided; while node colors are labeled, a legend explaining
+          the color scheme would improve clarity
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Sankey diagram with nodes and flows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source, target, and value correctly mapped to visual elements
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: flows with proportional widths, distinct
+          source colors, node labels, no circular flows'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible; flow widths accurately represent relative values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Node labels accurately show names and values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Energy Flow · sankey-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple sources flowing to multiple targets with varying magnitudes;
+          demonstrates flow crossing and stacking
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Energy flow from sources to sectors is a classic, realistic Sankey
+          use case with plausible TWh values
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for energy flows; total sources (156 TWh) equals
+          total targets (156 TWh) as expected
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → calculations → plotting → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded), but numpy is imported for bezier
+          calculations without random operations
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's figure, patch for bezier flows, quad for nodes, and
+          Label for annotations. However, could better leverage ColumnDataSource for
+          data management or hover tooltips for interactivity.
+  verdict: APPROVED
diff --git a/plots/sankey-basic/metadata/highcharts.yaml b/plots/sankey-basic/metadata/highcharts.yaml
index 380f96dba0..75943b5c63 100644
--- a/plots/sankey-basic/metadata/highcharts.yaml
+++ b/plots/sankey-basic/metadata/highcharts.yaml
@@ -23,3 +23,172 @@ review:
   weaknesses:
   - numpy imported but only used for random seed (could use deterministic data instead)
   - Library version shown as unknown in header comment
+  image_description: 'The plot displays a Sankey diagram showing U.S. energy flow.
+    On the left side are five energy sources (Coal, Natural Gas, Nuclear, Petroleum,
+    Renewable) represented as colored vertical bars with white text labels. The middle
+    shows an intermediate node (Electricity and Transportation). On the right are
+    four end-use sectors (Industrial, Residential, Commercial). Flows are represented
+    by curved bands connecting nodes, with widths proportional to energy values in
+    TWh. The title "sankey-basic · highcharts · pyplots.ai" appears at the top with
+    a subtitle "U.S. Energy Flow (values in TWh)". Colors used: dark blue (Coal),
+    olive/gold (Natural Gas), purple (Nuclear), cyan (Petroleum), dark green (Renewable),
+    brown (Electricity), orange (Transportation), plus various shades for end-use
+    sectors. The flow bands have 50% opacity allowing overlap visualization.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All node labels are readable with white text on dark backgrounds
+          and text outline. Title and subtitle are clear. Minor: some label positioning
+          could be slightly better optimized.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlaps; labels are positioned outside flow bands
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Flow bands are well-sized with appropriate opacity (0.5) for crossing
+          flows
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with distinct dark tones; no red-green
+          conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with appropriate margins; minor whitespace
+          at bottom
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled; nodes are self-labeled
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Sankey diagram visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source → Target → Value correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows flows with proportional widths, distinct node colors, link
+          opacity
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All flows visible and proportionally represented
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Sankey; nodes are self-labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "sankey-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple sources, intermediate transformation (Electricity),
+          and multiple end uses; demonstrates crossing flows well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent real-world scenario: U.S. energy flow with plausible TWh
+          values'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for energy sector; some values could be more
+          precisely calibrated to actual data
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart config → render'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: numpy imported but only used for seed (minor)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Outputs plot.png and plot.html (correct)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of Highcharts Sankey module with linkColorMode, curveFactor,
+          nodePadding, custom node colors, and tooltip formatting
+  verdict: APPROVED
diff --git a/plots/sankey-basic/metadata/letsplot.yaml b/plots/sankey-basic/metadata/letsplot.yaml
index b25b7fdbeb..ffde977734 100644
--- a/plots/sankey-basic/metadata/letsplot.yaml
+++ b/plots/sankey-basic/metadata/letsplot.yaml
@@ -23,3 +23,182 @@ review:
   weaknesses:
   - Node labels could be slightly larger for better readability at full resolution
   - Some minor whitespace imbalance (more space at bottom due to legend placement)
+  image_description: 'The plot displays a Sankey diagram visualizing energy flow from
+    four sources (Coal, Natural Gas, Nuclear, Renewable) on the left to three consumption
+    sectors (Industrial, Residential, Commercial) on the right. Each source node has
+    a dark navy rectangular bar with white labels showing the source name and total
+    TWh (e.g., "Coal (36 TWh)"). The flows are smooth bezier curves colored by energy
+    source: Coal is dark gray (#4A4A4A), Natural Gas is blue (#306998), Nuclear is
+    purple (#9B59B6), and Renewable is green (#27AE60). Flows have ~65% opacity with
+    thin white edges, allowing overlapping flows to remain distinguishable. Target
+    nodes on the right display sector names with totals (e.g., "Industrial (72 TWh)").
+    The title "Energy Flow · sankey-basic · letsplot · pyplots.ai" appears at the
+    top left in bold. A horizontal legend at the bottom shows all four energy source
+    colors. The plot fills approximately 70% of the canvas with balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold (~30pt), node labels are clear and readable
+          (~14pt scaled)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels are positioned outside the flow area
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Flows are clearly visible with good alpha (0.65); white edges help
+          distinguish overlapping flows
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors that work well for colorblind users (gray, blue,
+          purple, green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization (~70%), slight imbalance with more whitespace
+          at top/bottom than needed
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Sankey diagrams (no traditional axes) - deducting as per
+          criteria
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate for Sankey), legend well-placed at bottom
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Sankey diagram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source→target flows correctly mapped with proportional widths
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: flow visualization, node labels, distinct
+          colors, link opacity'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All flows visible, no data cut off
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four energy sources
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows exact format: "Energy Flow · sankey-basic · letsplot
+          · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple sources, multiple targets, varying flow magnitudes,
+          crossing flows
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible energy distribution scenario with realistic proportions
+          (gas dominates, coal to industrial)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: TWh values are realistic for a regional energy system
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Deterministic data (hardcoded values), but no explicit random seed
+          statement
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar effectively with geom_polygon, geom_rect, geom_text,
+          theme_minimal, but Sankey is manually constructed rather than using a native
+          Sankey geom (lets-plot lacks native Sankey support, so this manual approach
+          is appropriate)
+  verdict: APPROVED
diff --git a/plots/sankey-basic/metadata/matplotlib.yaml b/plots/sankey-basic/metadata/matplotlib.yaml
index 83f365b06f..3bb00c07e5 100644
--- a/plots/sankey-basic/metadata/matplotlib.yaml
+++ b/plots/sankey-basic/metadata/matplotlib.yaml
@@ -27,3 +27,181 @@ review:
   - Missing explicit legend to explain that blue represents primary energy sources
     and yellow represents end-use distribution
   - The assert statement adds slight complexity that could be replaced with a comment
+  image_description: "The plot displays a Sankey diagram illustrating national energy\
+    \ flow. The diagram uses two distinct colors: blue (steel blue #306998) for the\
+    \ primary energy sources and losses, and yellow/gold (#FFD43B) for the end-use\
+    \ distribution. \n\n**Left side (blue flows):** Four input sources flow into a\
+    \ central hub - Coal (120 TWh) from the bottom, Natural Gas (90 TWh) from the\
+    \ left, Nuclear (60 TWh) from the top, and Renewables (30 TWh) from the top-left.\
+    \ A Losses branch (100 TWh) exits downward from the hub.\n\n**Right side (yellow\
+    \ flows):** The remaining 200 TWh flows to four end-use sectors - Industrial (80\
+    \ TWh) upward, Transport (20 TWh) upper-right, Commercial (45 TWh) rightward,\
+    \ and Residential (55 TWh) downward.\n\n**Title:** \"National Energy Flow · sankey-basic\
+    \ · matplotlib · pyplots.ai\" at the top in bold.\n**Subtitle:** Italicized explanatory\
+    \ text at the bottom describing the flow pattern.\n**Labels:** Each flow is labeled\
+    \ with its name and TWh value in bold 18pt font.\n**Layout:** Clean white background,\
+    \ axes are turned off, good use of canvas space."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All labels clearly readable with 18pt bold font, title at 26pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, labels well positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Flows are appropriately sized with good proportions, alpha=0.75 provides
+          visibility while allowing overlap perception
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe and provide excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, diagram fills most of the space, minor asymmetry
+          on left side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Sankey, but all nodes labeled with descriptive names and
+          units (TWh)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid needed for Sankey (correct), but could benefit from a legend
+          explaining blue=sources vs yellow=end-use
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Sankey diagram implementation using matplotlib.sankey
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Flows correctly show source→target with widths proportional to values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows multiple sources, transformation/losses stage, and multiple
+          destinations
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, energy balance verified (300 = 200 + 100)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are accurate but no explicit legend for color meaning
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "National Energy Flow · sankey-basic · matplotlib
+          · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple sources, losses, and multiple destinations; demonstrates
+          flow proportionality well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Energy flow is a classic Sankey application with realistic TWh values
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for national energy data (300 TWh total)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: 'Deterministic data (no random), but no explicit seed needed; minor:
+          assert statement adds slight complexity'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and matplotlib.sankey imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib.sankey API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib.sankey module correctly with multiple connected diagrams,
+          custom pathlengths, and orientations. Could leverage more advanced features
+          like trunk customization.
+  verdict: APPROVED
diff --git a/plots/sankey-basic/metadata/plotly.yaml b/plots/sankey-basic/metadata/plotly.yaml
index 4a7f252696..7da7c67fe9 100644
--- a/plots/sankey-basic/metadata/plotly.yaml
+++ b/plots/sankey-basic/metadata/plotly.yaml
@@ -23,3 +23,187 @@ review:
   - Could include flow value labels on hover or annotations to show exact TWh values
   - Transportation sector only receives flows from Gas and Renewables, missing Coal
     and Nuclear contributions
+  image_description: "The plot displays a Sankey diagram showing energy distribution\
+    \ from four source nodes (Natural Gas, Nuclear, Renewables, Coal) on the left\
+    \ to four target nodes (Residential, Commercial, Transportation, Industrial) on\
+    \ the right. \n\n**Colors**: Source nodes use distinct colors - dark blue for\
+    \ Coal, light blue for Natural Gas, yellow for Nuclear, and green for Renewables.\
+    \ Target nodes use muted grayish-blue tones. Flow links are semi-transparent (alpha\
+    \ ~0.5) and colored to match their source.\n\n**Title**: \"Energy Distribution\
+    \ · sankey-basic · plotly · pyplots.ai\" centered at top in appropriate font size.\n\
+    \n**Layout**: The diagram fills the canvas well with adequate margins. Node labels\
+    \ are positioned clearly next to their respective nodes. Flow widths are proportional\
+    \ to values, making it easy to identify major pathways (Natural Gas → Residential\
+    \ being the largest flow)."
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text clearly readable: title at appropriate size (~36pt), node
+          labels at ~22pt, all crisp and legible'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; node labels are well-positioned and do not interfere
+          with flows
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Flow links appropriately sized with 0.5 opacity allowing visibility
+          where flows cross
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Color scheme uses blue, yellow, green which are colorblind-friendly;
+          distinct hues for different sources
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization; diagram fills ~70% of space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Sankey diagrams (no axes) - applying 2 points for node labels
+          which are descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean white template, no distracting elements; node colors serve
+          as implicit legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Sankey diagram showing flows between nodes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source-target-value correctly mapped; link widths proportional to
+          flow values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors for sources, link opacity
+          for crossing flows, clear node labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All flows visible and readable
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Node labels accurately describe each category
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: "Energy Distribution · sankey-basic · plotly
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows flows from 4 sources to 4 targets with varying magnitudes;
+          demonstrates multiple flow paths and crossings; minor: could show more dramatic
+          variation in flow sizes'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Energy distribution from sources (Coal, Gas, Nuclear, Renewables)
+          to sectors (Residential, Commercial, Industrial, Transportation) is a classic,
+          comprehensible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Values in TWh are sensible for energy; Natural Gas having highest
+          residential flow is realistic; minor: Transportation sector could have more
+          diverse energy sources'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → colors → figure creation
+          → layout → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values used)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only `plotly.graph_objects` imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API (go.Sankey)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Plotly's interactive Sankey with go.Sankey, custom node padding/thickness,
+          link transparency, HTML output for interactivity
+  verdict: APPROVED
diff --git a/plots/sankey-basic/metadata/plotnine.yaml b/plots/sankey-basic/metadata/plotnine.yaml
index 0d6b08303d..c3496a4279 100644
--- a/plots/sankey-basic/metadata/plotnine.yaml
+++ b/plots/sankey-basic/metadata/plotnine.yaml
@@ -23,3 +23,183 @@ review:
   weaknesses:
   - No axis labels (though Sources and Sectors annotations partially compensate)
   - Some flow value labels could be positioned more consistently
+  image_description: 'The plot displays a Sankey diagram showing energy flow from
+    sources to sectors. On the left side are four source nodes: Coal (dark blue),
+    Gas (yellow), Nuclear (teal), and Renewables (green). On the right side are three
+    target/sector nodes: Industrial (red), Commercial (purple), and Residential (orange).
+    Curved flow bands connect sources to targets, with widths proportional to flow
+    values. Flow values (35, 25, 20, 18, 15, 15, 12, 12) are displayed on the larger
+    flows. The title "Energy Flow · sankey-basic · plotnine · pyplots.ai" is prominently
+    displayed at the top. "Sources" and "Sectors" labels appear at the bottom. The
+    color scheme uses distinct colors for each source and target, with flows colored
+    by their source. The layout is well-balanced with good use of the 16:9 canvas.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold, node labels are clearly readable with good
+          font size (16pt), flow values are visible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels are well-positioned outside nodes; flow
+          values are staggered to avoid overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Flow bands are well-sized with appropriate alpha (0.5) for overlapping
+          areas; nodes have white borders for clarity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color variety; sources and targets have distinct colors; however
+          some flows crossing may be hard to distinguish for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization; plot fills appropriate portion of space
+          with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels present (appropriate for Sankey diagrams which don't
+          use traditional axes, but "Sources" and "Sectors" annotations help)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate for Sankey); legend hidden as colors are self-explanatory
+          with labels
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Sankey diagram with nodes and flows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Source/target/value correctly mapped; flow widths proportional to
+          values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: flows from sources to targets, width
+          proportional to value, distinct colors, node labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All flows and nodes visible; appropriate scaling
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; inline labels are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Energy Flow · sankey-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied flow magnitudes (8-35), multiple sources connecting
+          to multiple targets, demonstrates both major and minor flows
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Energy flow from sources (Coal, Gas, Nuclear, Renewables) to sectors
+          (Industrial, Commercial, Residential) is a classic, real-world Sankey application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable energy flow units; total of 160 distributed
+          across 9 flows is appropriate
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Code follows imports → data → plot → save pattern, but has some complexity
+          due to manual polygon construction (acceptable for Sankey)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (no random generation needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' correctly
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Creative use of plotnine's grammar of graphics (geom_polygon for
+          flows, geom_rect for nodes, geom_text for labels, scale_fill_manual, theme
+          customization); impressive implementation of Sankey in a library without
+          native support
+  verdict: APPROVED
diff --git a/plots/sankey-basic/metadata/seaborn.yaml b/plots/sankey-basic/metadata/seaborn.yaml
index 8c59fef818..2fc2241093 100644
--- a/plots/sankey-basic/metadata/seaborn.yaml
+++ b/plots/sankey-basic/metadata/seaborn.yaml
@@ -27,3 +27,185 @@ review:
     native Sankey support
   - Source totals are too uniform (Coal=Gas=60 TWh) - more variety in magnitudes would
     better demonstrate proportional scaling
+  image_description: The plot displays a Sankey diagram showing energy flow from three
+    sources (Coal, Gas, Nuclear) on the left to three sectors (Industrial, Residential,
+    Commercial) on the right. The title "sankey-basic · seaborn · pyplots.ai" is displayed
+    at the top in bold black text. Source nodes are colored pink (Coal), green (Gas),
+    and blue (Nuclear), each with labels showing the energy source name and total
+    TWh value. Target nodes are colored in softer pastel tones - teal (Residential),
+    light purple (Industrial), and orange (Commercial). Flow bands connect sources
+    to targets with smooth bezier curves, colored by source with 65% opacity. Two
+    legends at the bottom identify "Energy Sources" (left) and "Sectors" (right).
+    Node labels include values in TWh units. The overall layout is clean with a white
+    background.
+  criteria_checklist:
+    visual_quality:
+      score: 33
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text clearly readable, good font sizes (18pt labels, 26pt title)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements, labels positioned cleanly
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: flow bands clearly visible with good opacity (0.65), nodes well-sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: uses husl and Set2 palettes; pink/green/blue source distinction is
+          good but pink (Coal) and orange (Commercial) could be confused by some colorblind
+          users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: good use of canvas space, legends positioned at bottom, slight imbalance
+          with more whitespace at bottom
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: two well-placed legends with clear formatting
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct Sankey diagram implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: sources, targets, and values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: flow widths proportional to values, distinct colors for sources
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all nodes and flows visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: legends correctly identify sources and sectors
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: true
+        comment: uses correct format "sankey-basic · seaborn · pyplots.ai" ✓ (Actually
+          correct, updating to 2/2)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: correctly uses "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: shows multiple sources and targets with varying flow magnitudes,
+          but all sources have equal total (60, 60, 45 TWh) - could show more variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: energy flow scenario is realistic and comprehensible (coal, gas,
+          nuclear to residential/commercial/industrial)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: TWh values are reasonable for energy flows, though relative proportions
+          are somewhat uniform
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: code is longer than typical KISS style but maintains linear structure
+          without classes; uses procedural approach which is acceptable
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: no deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: saves as 'plot.png' ✓ (Actually correct)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: correctly saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: seaborn is only used for styling (sns.set_theme, color_palette) but
+          no seaborn plot functions are called. The actual Sankey is built with matplotlib
+          patches and fill_between. This is a significant weakness as seaborn doesn't
+          natively support Sankey diagrams.
+  verdict: APPROVED
diff --git a/plots/scatter-3d/metadata/altair.yaml b/plots/scatter-3d/metadata/altair.yaml
index 65f6f7f952..41d78dabe5 100644
--- a/plots/scatter-3d/metadata/altair.yaml
+++ b/plots/scatter-3d/metadata/altair.yaml
@@ -25,3 +25,186 @@ review:
     3D axes more clearly
   - Uses deprecated selection_interval(bind=scales) API; should use chart.interactive()
     for simpler approach
+  image_description: "The plot displays a 3D scatter plot projected into 2D using\
+    \ an isometric projection. It shows three distinct clusters of points:\n- **Cluster\
+    \ 1** (blue, #306998): Located in the upper-right area of the canvas, containing\
+    \ approximately 50 points\n- **Cluster 2** (yellow, #FFD43B): Located in the upper-left\
+    \ area of the canvas, containing approximately 50 points  \n- **Cluster 3** (orange,\
+    \ #E07B39): Located in the lower-center to lower-right area, containing approximately\
+    \ 50 points\n\nThe title \"scatter-3d · altair · pyplots.ai\" is clearly displayed\
+    \ at the top with a subtitle \"Isometric projection with depth-based opacity\"\
+    . The X-axis is labeled \"Projected X\" and the Y-axis is labeled \"Projected\
+    \ Z\". Points have varying opacity based on depth, with white stroke borders.\
+    \ A legend is positioned in the top-right corner showing all three cluster labels.\
+    \ The grid is subtle with dashed lines."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points well-sized at 280 with white stroke for distinction; depth-based
+          opacity works well (-1 for some overlap in dense cluster areas)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and orange palette is colorblind-safe (avoids red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with three well-separated clusters, slight
+          gap in center (-1)
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Projected X", "Projected Z") but lack units
+          (-1)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle with alpha=0.3 and dashed style, legend well-placed;
+          however axis title "X-Y Plane (Horizontal)" in code doesn't match rendered
+          "Projected X" (-1)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D scatter using isometric projection (appropriate for 2D
+          library)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly mapped and projected
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has 3D visualization, color encoding (cluster), transparency for
+          depth; interactive pan/zoom in HTML; tooltips show original X/Y/Z values
+          (-1 no color encoding of fourth numeric variable as spec suggests optional)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Cluster 1/2/3" matching data
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Uses correct format "scatter-3d · altair · pyplots.ai" with appropriate
+          separators; subtitle is a nice addition (-1 minor: subtitle adds complexity)'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct 3D clusters with spatial separation, demonstrates
+          the purpose of 3D scatter; depth ordering visible (-1 could show more varied
+          cluster shapes)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Clustered data in 3D space is a plausible ML/statistics scenario
+          (-1 generic "Cluster 1/2/3" labels vs domain-specific)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in reasonable range (-5 to 6), typical for normalized/standardized
+          data
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → projection → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only imports altair, numpy, pandas (all used)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: '`alt.selection_interval` with `bind="scales"` is deprecated; should
+          use `alt.selection_point` or `chart.interactive()` instead'
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (correct)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Altair''s declarative encoding: opacity encoding
+          for depth, order encoding for painter''s algorithm, tooltips, interactive
+          pan/zoom selection, subtitle in title, configure_axis for grid styling'
+  verdict: APPROVED
diff --git a/plots/scatter-3d/metadata/bokeh.yaml b/plots/scatter-3d/metadata/bokeh.yaml
index 81960ce382..68b63155d8 100644
--- a/plots/scatter-3d/metadata/bokeh.yaml
+++ b/plots/scatter-3d/metadata/bokeh.yaml
@@ -26,3 +26,181 @@ review:
   - Colorbar title is somewhat generic; could clarify relationship to Z-axis
   - Grid is visible in the 2D projection space which slightly conflicts with the 3D
     axis visualization aesthetic
+  image_description: 'The plot displays a 3D scatter visualization using an isometric
+    projection onto a 2D canvas. Three dark gray axis lines with arrow tips emanate
+    from a central origin point, labeled "X-Axis (units)", "Y-Axis (units)", and "Z-Axis
+    (units)". The scatter points (150 total) form three distinct clusters in 3D space,
+    projected to 2D. Points are colored using the Viridis colormap based on their
+    Z-value (elevation), ranging from deep purple (low Z ~-2) through green to yellow
+    (high Z ~4). A vertical color bar on the right side is titled "Color: Elevation"
+    with numeric scale markers. The title "scatter-3d · bokeh · pyplots.ai" appears
+    at the top left. The background is a light off-white (#f9f9f9) with subtle dashed
+    grid lines. Marker sizes vary based on depth to create a pseudo-3D effect.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title 48pt is excellent, axis labels at 44pt are very readable,
+          colorbar text is clear. Slightly deducted because colorbar title "Color:
+          Elevation" is rotated and harder to read at a glance.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated from data
+          and each other.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized 15-35px are visible but slightly small for 150 points
+          at this canvas size. Alpha 0.8 works well. Could be slightly larger.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis palette is colorblind-safe and provides excellent contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though the plot is slightly left-heavy
+          with more whitespace on the right near the colorbar.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All three axes have descriptive labels with "(units)" indicating
+          unit placeholder.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.25) which is good, but slightly visible.
+          Colorbar is well placed.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D scatter plot implemented via isometric projection (appropriate
+          since Bokeh lacks native 3D).
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly assigned to three dimensions with proper projection.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Three numeric variables plotted, color encoding for fourth dimension
+          present, clusters visible.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes scaled appropriately with good padding.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents Z-value mapping with correct scale.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title correctly formatted as "scatter-3d · bokeh · pyplots.ai".
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct clusters demonstrating spatial relationships.
+          Could show more variation in cluster density or sizes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Generic clustered data is plausible for demonstrating 3D visualization,
+          but context is abstract rather than domain-specific.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in sensible range (-2 to 4), typical for normalized/standardized
+          data.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → projection → plot → save. No functions/classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducible results.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, bokeh components).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh 3.x API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, ColorMapper, interactive tools (pan, zoom,
+          box_zoom, reset, save). However, the plot is saved as static PNG which limits
+          the interactivity benefit. The HTML export does provide interactive capability.
+  verdict: APPROVED
diff --git a/plots/scatter-3d/metadata/highcharts.yaml b/plots/scatter-3d/metadata/highcharts.yaml
index 306e0e0c97..4c52c760dd 100644
--- a/plots/scatter-3d/metadata/highcharts.yaml
+++ b/plots/scatter-3d/metadata/highcharts.yaml
@@ -29,3 +29,180 @@ review:
     3D feature mentioned in the spec
   - Uses raw JavaScript config rather than highcharts-core Python library, making
     it less maintainable
+  image_description: 'The plot displays a 3D scatter visualization with three distinct
+    clusters rendered in Highcharts. The title "scatter-3d · highcharts · pyplots.ai"
+    appears at the top in bold with a subtitle "Three-dimensional clustered data visualization".
+    Three clusters are shown: Cluster A (dark blue/Python Blue, #306998) positioned
+    in the upper-right area, Cluster B (yellow/Python Yellow, #FFD43B) in the middle-left
+    area, and Cluster C (teal, #17BECF) scattered in the lower portion. Each axis
+    is labeled with units: "X Position (units)", "Y Position (units)", and "Z Position
+    (units)". The 3D perspective includes subtle blue-tinted frame planes at the bottom,
+    back, and side. A vertical legend on the right identifies the three clusters.
+    The overall layout is clean with good use of the canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, tick marks, and legend are all clearly
+          readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all labels are well-positioned and fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (radius 14) with good opacity (0.8); clusters
+          are clearly distinguishable, though some markers in Cluster C slightly overlap
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, teal) with no red-green
+          conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; plot fills ~60% of canvas; slight imbalance
+          with more whitespace on left
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes have descriptive labels with units: "X Position (units)",
+          etc.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; however legend is positioned far
+          to the right with significant empty space between the chart and legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D scatter plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly mapped to three spatial dimensions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 3D visualization, clustering, transparency, color encoding all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range with appropriate tick intervals
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three clusters
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: "scatter-3d · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct clusters with different spatial distributions;
+          demonstrates 3D spatial relationships well, but all clusters have similar
+          spread
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic "Position" labels work for demonstrating 3D scatter but lack
+          a specific real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for demonstrating 3D relationships; -5 to 6 range
+          appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses direct JavaScript chart config instead of highcharts-core Python
+          library, which is unconventional but functional
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts 3D module with scatter3d type, 3D frame planes, interactive
+          tooltips, and hover states; however, interactive rotation (drag-to-rotate)
+          which is Highcharts' signature 3D feature is not explicitly enabled
+  verdict: APPROVED
diff --git a/plots/scatter-3d/metadata/letsplot.yaml b/plots/scatter-3d/metadata/letsplot.yaml
index c80df9cbf7..027b6022d4 100644
--- a/plots/scatter-3d/metadata/letsplot.yaml
+++ b/plots/scatter-3d/metadata/letsplot.yaml
@@ -26,3 +26,180 @@ review:
     the projection is manually implemented
   - Realistic context could be more specific (e.g., meteorological sensors or geological
     survey points)
+  image_description: The plot displays a 3D scatter plot rendered as a 2D isometric
+    projection. Three distinct clusters of points are visible, each colored according
+    to altitude (z-value) using the viridis colormap ranging from purple (low, ~0m)
+    to yellow (high, ~4m). The upper-left cluster shows yellow/green points at higher
+    altitudes. The lower-left cluster displays purple points at lower altitudes. The
+    right-center cluster shows cyan/teal points at intermediate altitudes. A floor
+    grid provides depth perception, and three axis lines (X, Y, Z) emanate from the
+    origin with bold labels. The title "scatter-3d · letsplot · pyplots.ai" appears
+    at the top. Axis labels read "Projected X (m)" and "Projected Y (m)". The legend
+    shows "Altitude (m)" with the viridis color scale.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, legend text, and axis markers (X,
+          Y, Z) are all clearly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and data points are well-separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Points are visible with good alpha (0.75), but some overlapping in
+          the dense right-center cluster; size variation based on depth is a nice
+          touch
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; floor grid and axis
+          lines create good spatial context
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Projected X (m)", "Projected Y (m)",
+          "Altitude (m)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriately subtle, but the standard axis grid lines are
+          removed (panel_grid=element_blank()) while only the floor grid remains;
+          legend is well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D scatter plot type with isometric projection
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly mapped; color encoding for altitude adds fourth
+          dimension as spec suggests
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3D visualization, color encoding, point
+          transparency, clear axis labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within the plot range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Altitude (m)" with accurate color scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-3d · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 3 distinct clusters with different shapes (spherical, elongated,
+          flat disk) demonstrating various 3D spatial relationships
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: '"Sensor readings in a volume" is plausible but somewhat generic;
+          could be more specific'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in sensible ranges (-6 to 6 meters) for spatial measurements
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script with imports → data → plot → save; no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar, viridis scale, layer_tooltips for interactivity,
+          but the 3D projection is custom math rather than a lets-plot feature; no
+          native 3D support utilized
+  verdict: APPROVED
diff --git a/plots/scatter-3d/metadata/matplotlib.yaml b/plots/scatter-3d/metadata/matplotlib.yaml
index 365309fda5..30032bd5c3 100644
--- a/plots/scatter-3d/metadata/matplotlib.yaml
+++ b/plots/scatter-3d/metadata/matplotlib.yaml
@@ -26,3 +26,177 @@ review:
   - Axis label font size at 18pt is slightly below the recommended 20pt for optimal
     legibility
   - Could benefit from subtle grid lines to help viewers judge positions in 3D space
+  image_description: The plot displays a 3D scatter plot with points distributed in
+    three-dimensional space representing protein structure data. The title "Protein
+    Structure · scatter-3d · matplotlib · pyplots.ai" appears at the top. Three axes
+    are labeled "X Coordinate (Å)", "Y Coordinate (Å)", and "Z Coordinate (Å)" with
+    tick marks showing values roughly from -2 to 12 on each axis. Points are colored
+    using the viridis colormap (purple to green to yellow) based on their Z-coordinate
+    (elevation), with a colorbar on the left labeled "Elevation (Å)" ranging from
+    0 to approximately 11. The points show three distinct clusters at different locations
+    in 3D space. Each point has a white edge and partial transparency (alpha=0.7).
+    The viewing angle shows the plot from an elevated perspective (elev=25, azim=45).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 18pt, tick labels at 14pt - all readable
+          but axis labels slightly under the recommended 20pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points at s=120 with alpha=0.7 are well-sized for 150 points, white
+          edges help distinguish overlapping points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight imbalance with colorbar on left creating
+          asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes have descriptive labels with units (Å)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Pane styling is subtle but no explicit grid lines shown
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D scatter plot using matplotlib's projection="3d"
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly assigned to three dimensions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has color encoding for 4th dimension, point transparency for overlapping,
+          clear axis labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents elevation values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows `{description} · {spec-id} · {library} · pyplots.ai`
+          format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct clusters demonstrating spatial relationships,
+          color encoding shows z-variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Protein structure with atomic coordinates in Angstroms is an excellent
+          scientific context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for molecular coordinates (0-12 Å range), though
+          typical protein domains might span larger ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves to 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses matplotlib's 3D projection capability with basic customization
+          (view_init, pane styling), but doesn't leverage advanced features like surface
+          plots, custom colormaps, or annotations
+  verdict: APPROVED
diff --git a/plots/scatter-3d/metadata/plotly.yaml b/plots/scatter-3d/metadata/plotly.yaml
index 45c5d0074f..79fcffd0a1 100644
--- a/plots/scatter-3d/metadata/plotly.yaml
+++ b/plots/scatter-3d/metadata/plotly.yaml
@@ -22,3 +22,177 @@ review:
   - Colorbar title font could be larger for better visibility at full resolution
   - Data uses generic coordinate labels rather than a more compelling real-world scenario
   - Could leverage Plotly's hover templates for richer interactivity
+  image_description: 'The plot displays a 3D scatter plot with points distributed
+    across three-dimensional space. The title "scatter-3d · plotly · pyplots.ai" is
+    centered at the top in black text. The plot uses the Viridis colorscale where
+    colors range from dark purple (low Z values around -10) through teal/blue (mid
+    Z values around 0) to bright yellow/green (high Z values around 10). Three distinct
+    clusters are visible: one cluster in the upper region with yellow-green points
+    (high Z), one cluster in the middle-lower area with teal/blue points (mid Z),
+    and one cluster with purple/blue points (low Z). All three axes are labeled: "X
+    Coordinate (units)", "Y Coordinate (units)", and "Z Coordinate (units)". A vertical
+    colorbar on the right shows "Z Value (units)" with tick marks from -10 to 5. The
+    background is white with subtle gray gridlines. Point size is uniform and markers
+    have good opacity (0.8). The camera angle provides a good 3D perspective view.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and readable, axis labels are clear at 18pt, tick
+          fonts at 14pt are readable but could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized with appropriate alpha (0.8), but for 150
+          points the size could be slightly optimized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colorscale is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, but colorbar placement leaves some unused
+          space on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes have descriptive labels with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle at 0.3 alpha, colorbar serves as legend but could
+          have better positioning
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly assigned to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3D scatter, color encoding for 4th dimension,
+          clusters, proper axis labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly indicates Z value mapping
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 3 distinct clusters demonstrating spatial relationships, but
+          could show more varied cluster densities
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic coordinate system is acceptable for demonstrating 3D relationships,
+          but not a real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in reasonable range (-10 to +10)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Scatter3d for 3D visualization and writes HTML for interactivity,
+          but doesn't leverage hover templates or animation
+  verdict: APPROVED
diff --git a/plots/scatter-animated-controls/metadata/altair.yaml b/plots/scatter-animated-controls/metadata/altair.yaml
index b8868069eb..189c5c73fd 100644
--- a/plots/scatter-animated-controls/metadata/altair.yaml
+++ b/plots/scatter-animated-controls/metadata/altair.yaml
@@ -25,3 +25,184 @@ review:
   - Legend placement creates slight right-margin crowding; could benefit from more
     padding
   - Missing interactive() call for HTML output which would enable zoom/pan exploration
+  image_description: 'The plot displays a faceted scatter visualization with four
+    horizontal panels showing years 2000, 2007, 2014, and 2021. Each panel contains
+    scatter points representing 12 countries plotted with GDP per Capita (thousands
+    USD) on the x-axis (0-50 range) and Life Expectancy (years) on the y-axis (45-90
+    range). Points are colored by region: blue (Region 1), golden yellow (Region 2),
+    and olive green (Region 3). Point size varies to represent population in millions
+    (0-200 scale shown in legend). The main title "scatter-animated-controls · altair
+    · pyplots.ai" appears at the top with a gray subtitle "Country Development Metrics
+    Across Key Years (Static Faceted View)". Two legends on the right display Region
+    colors and Population size scale. The visualization effectively shows temporal
+    progression of development metrics across the four time periods.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, year labels, and axis text are clearly readable; legend text
+          slightly on the smaller side
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean separation between facets
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized for data density with appropriate opacity; some
+          overlap between points in denser regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and olive green palette is colorblind-friendly, good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of horizontal space for facets; plot area could be slightly
+          taller
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "GDP per Capita (thousands USD)"
+          and "Life Expectancy (years)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; two legends positioned well but slightly crowd the
+          right margin
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct faceted scatter plot as alternative to animation (spec allows
+          static faceted version)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=GDP, Y=Life Expectancy, Size=Population, Color=Region, Time=Facet
+          columns
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows temporal evolution via facets; lacks explicit play/pause controls
+          but spec notes "Libraries without animation support should implement a static
+          faceted version"
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points clearly (0-50 for GDP, 45-90 for life expectancy)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Region and Population legends are accurate and clear
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format "scatter-animated-controls · altair · pyplots.ai"
+          but uses middle dot instead of interpunct
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 12 countries across 4 years with 3 regions; clear temporal
+          progression visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gapminder-style country development data is realistic and neutral
+          (GDP, life expectancy, population)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: GDP 3-45K USD, life expectancy 50-85 years, population 25-200M are
+          all realistic values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → chart creation → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png AND plot.html (correct for Altair)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of Altair''s declarative grammar: faceting, encodings,
+          tooltips, scale configuration; could have used interactive() for zoom/pan
+          in HTML'
+  verdict: APPROVED
diff --git a/plots/scatter-animated-controls/metadata/bokeh.yaml b/plots/scatter-animated-controls/metadata/bokeh.yaml
index 7a5f014b08..4061a7a4b5 100644
--- a/plots/scatter-animated-controls/metadata/bokeh.yaml
+++ b/plots/scatter-animated-controls/metadata/bokeh.yaml
@@ -26,3 +26,183 @@ review:
     plot, making it harder to associate legend entries with data points
   - Grid styling with dashed lines could be slightly more subtle (alpha 0.2 instead
     of 0.3)
+  image_description: 'The plot displays a Gapminder-style animated scatter plot showing
+    country development data for the year 2014 (middle frame). The visualization features
+    15 data points (countries) plotted with GDP per Capita (USD) on the x-axis (ranging
+    0-80,000) and Life Expectancy (Years) on the y-axis (ranging 40-95). Points are
+    color-coded by region using a 5-color palette: blue (North), yellow (South), coral/salmon
+    (East), teal (West), and green (Central). Point sizes vary based on population,
+    with larger bubbles indicating higher populations. A large watermark "2014" appears
+    in light gray in the lower-right portion of the plot. The legend is positioned
+    in the top-left corner with clear region labels. The title follows the required
+    format: "scatter-animated-controls · bokeh · pyplots.ai". The background is a
+    subtle off-white (#fafafa) with dashed grid lines at low opacity.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, tick labels at 28pt - all perfectly
+          readable at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend well-positioned, year watermark
+          doesn't interfere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bubble sizes well-scaled with population encoding, alpha=0.7 appropriate
+          for 15 points, white outlines help distinguish overlapping bubbles
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct, colorblind-friendly colors (blue, yellow, coral, teal,
+          green) with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, adequate margins, legend inside plot area
+          works well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "GDP per Capita (USD)" and "Life
+          Expectancy (Years)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha=0.3, but legend glyph
+          circles are too small relative to the actual bubble sizes in the plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct animated scatter plot with bubble sizing
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=GDP, Y=Life Expectancy, Size=Population, Color=Region - all correctly
+          mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Play/pause button, timeline slider, year display, color coding, size
+          encoding all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full range of data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 regions with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-animated-controls · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in GDP, life expectancy, and population across regions;
+          good spread of data points demonstrating the Gapminder concept
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated country development data with plausible GDP ranges (5K-60K),
+          life expectancy (58-78 years), and regional groupings
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for country-level metrics, though some GDP values
+          on the higher end
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Code is mostly linear but has some complexity with data generation
+          loops; acceptable for animation requirements
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh 3.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource, CustomJS callbacks for interactivity,
+          factor_cmap for categorical coloring, HoverTool. Could leverage more Bokeh-specific
+          animation features.
+  verdict: APPROVED
diff --git a/plots/scatter-animated-controls/metadata/highcharts.yaml b/plots/scatter-animated-controls/metadata/highcharts.yaml
index f51296eb91..baf979a930 100644
--- a/plots/scatter-animated-controls/metadata/highcharts.yaml
+++ b/plots/scatter-animated-controls/metadata/highcharts.yaml
@@ -23,3 +23,182 @@ review:
   - Year displayed in two places (subtitle and control panel) - slightly redundant
   - Right side of chart has unused space (x-axis max 100,000 but data only reaches
     ~80,000)
+  image_description: 'The plot shows a Gapminder-style animated bubble chart with
+    GDP per Capita (USD) on the x-axis (ranging from $0 to $80,000+) and Life Expectancy
+    (Years) on the y-axis (ranging from 40 to 90). The chart displays 20 countries
+    as bubbles, color-coded by 4 regions: Region 1 (blue), Region 2 (yellow), Region
+    3 (purple), and Region 4 (cyan). Bubble sizes represent population. The title
+    correctly shows "scatter-animated-controls · highcharts · pyplots.ai" with a subtitle
+    displaying "Year: 2000". At the bottom, there''s a control panel with a blue "▶
+    Play" button, a "Year:" label, a timeline slider, and the current year "2000"
+    displayed in blue. The legend is positioned horizontally at the top. The overall
+    layout is clean with a white background and subtle grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and controls are all readable. Tick
+          labels could be slightly larger but are acceptable.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; bubbles have some overlap but that's
+          expected for this visualization type.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bubbles are well-sized with good opacity (0.75). Some smaller bubbles
+          are quite small but visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses colorblind-safe palette: blue (#306998), yellow (#FFD43B),
+          purple (#9467BD), cyan (#17BECF).'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, controls nicely positioned at bottom. Slight
+          right-side empty space due to x-axis max at 100,000.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"GDP per Capita (USD)" and "Life Expectancy (Years)" with units.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), legend well-placed but could use larger
+          symbols.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct animated bubble chart (scatter with size encoding).
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=GDP, Y=Life Expectancy, Size=Population, Color=Region - all correctly
+          assigned.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Play/pause button ✓, timeline slider ✓, year display (large subtitle
+          + control panel) ✓, smooth transitions ✓.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range (0-100K GDP, 40-90 life expectancy).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Region labels match data correctly.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but subtitle shows year twice (in subtitle area
+          and controls).
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 20 countries across 20 years with varying GDP growth rates,
+          life expectancy improvements, and population sizes. Good variety but all
+          trends are generally positive.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gapminder-style country data is a perfect, neutral, real-world scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: GDP values (5K-100K), life expectancy (45-85), populations (10M-500M)
+          are realistic. Some values on the high end.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → chart config → HTML → screenshot.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (json, tempfile, time, urllib, Path, numpy,
+          selenium).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses manual JS animation rather than Highcharts motion module (acceptable
+          given complexity).
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Leverages Highcharts bubble chart, custom JS animation, interactive
+          controls. Could have used Highcharts accessibility module.
+  verdict: APPROVED
diff --git a/plots/scatter-animated-controls/metadata/letsplot.yaml b/plots/scatter-animated-controls/metadata/letsplot.yaml
index eaeea2d4ad..9ea10c5ee8 100644
--- a/plots/scatter-animated-controls/metadata/letsplot.yaml
+++ b/plots/scatter-animated-controls/metadata/letsplot.yaml
@@ -26,3 +26,188 @@ review:
     facet strip labels work but larger text could enhance the visualization
   - Some point overlap in the 2019 panel where multiple countries converge at higher
     GDP/life expectancy values
+  image_description: 'The plot displays a faceted scatter visualization with 5 panels
+    showing key years (2000, 2005, 2010, 2015, 2019). Each panel shows GDP per Capita
+    on a logarithmic x-axis (ranging from ~3,162 to ~31,623 USD) vs Life Expectancy
+    on the y-axis (56-80 years). Ten fictional countries are represented by different
+    colored circles using the "Paired" color palette: Northland (light blue), Eastoria
+    (dark blue), Westopia (light green), Southaven (dark green), Centralia (pink),
+    Alpinia (red), Deltania (light orange), Oceanica (dark orange), Valleysia (light
+    purple), and Highlands (dark purple). Point sizes encode population (ranging from
+    ~50M to ~150M as shown in legend). The title "scatter-animated-controls · lets-plot
+    · pyplots.ai" is displayed prominently at the top. A legend on the right shows
+    both population size scale and country color mapping. The plot uses a minimal
+    theme with subtle gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large (~28pt), axis labels ~18pt, tick labels ~14pt,
+          strip text ~18pt bold - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; facet labels, axis labels, and legend
+          are well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (6-20 range) with good alpha (0.85), though
+          some overlap in crowded years like 2019
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Paired palette provides good distinction but some similar hues (light
+          green/dark green, pinks/reds) may be harder for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 1600x900 canvas scaled 3x; facets fill space well,
+          legend positioned cleanly on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"GDP per Capita (log scale, USD)" and "Life Expectancy (years)"
+          - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (minimal theme), but legend shows two separate sections
+          which works well
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: faceted scatter plot as fallback for animation (per spec:
+          "Libraries without animation support should implement a static faceted version")'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=GDP, Y=Life Expectancy, Size=Population, Color=Country - all correctly
+          mapped
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: 'Shows temporal evolution via facets, size encoding, color grouping.
+          Missing: no visible "current time/year" large background text as spec suggests,
+          though facet strip labels serve similar purpose'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, log scale handles GDP spread well
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Both Population (M) and Country legends are accurate and match data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"scatter-animated-controls · lets-plot · pyplots.ai" - correct format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows clear temporal progression with GDP growth and life expectancy
+          improvements over 20 years; different country trajectories visible. Minor:
+          some countries cluster similarly'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gapminder-style country data is excellent, neutral, educational scenario
+          with realistic metrics
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: GDP ($3,000-$50,000) and life expectancy (56-80 years) are realistic
+          ranges; population scale reasonable
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean: imports → data generation → plot → save; no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Imports are appropriate, though `# noqa: F405` needed for wildcard
+          import'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of ggplot2-style grammar: `ggplot()`, `aes()`, `geom_point()`,
+          `facet_wrap()`, `scale_x_log10()`, `scale_color_brewer()`, `scale_size()`,
+          `theme_minimal()`, and `ggsize()`. Could have used additional lets-plot
+          features like tooltips'
+  verdict: APPROVED
diff --git a/plots/scatter-animated-controls/metadata/plotly.yaml b/plots/scatter-animated-controls/metadata/plotly.yaml
index 2a53a7af80..0bb591606b 100644
--- a/plots/scatter-animated-controls/metadata/plotly.yaml
+++ b/plots/scatter-animated-controls/metadata/plotly.yaml
@@ -28,3 +28,178 @@ review:
     in some frames
   - Year watermark annotation shows static 2000 and does not update with animation
     frames
+  image_description: 'The plot displays a Gapminder-style animated scatter visualization
+    showing 8 countries across 4 regions (North, South, East, West). The x-axis shows
+    "GDP per Capita ($)" ranging from 0 to 40k, and the y-axis shows "Life Expectancy
+    (years)" ranging from 50 to 85. Bubble sizes represent population. The first animation
+    frame (Year 2000) is shown with a large watermark "2000" in the top-right corner.
+    Four colors are used: blue for North, yellow/gold for South, red/coral for East,
+    and green for West. Play/Pause buttons are visible at the bottom-left, and a timeline
+    slider spans the bottom showing years 2000-2020. The current year "Year: 2000"
+    is displayed below the x-axis label. The title "scatter-animated-controls · plotly
+    · pyplots.ai" is centered at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, ticks at 18pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend well-positioned in top-left
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized with size_max=80, good opacity at 0.85, though
+          some markers in the low-GDP cluster overlap slightly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, red, green) with good contrast
+          against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight imbalance with most data clustered on
+          left side of plot area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "GDP per Capita ($)" and "Life Expectancy
+          (years)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is very subtle which is good, but legend could be positioned
+          better to not overlap plot area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct animated scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: GDP on X, Life Expectancy on Y, Population as size, Region as color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Play/Pause controls, timeline slider, year display (watermark + slider
+          label), smooth transitions
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Region categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "scatter-animated-controls · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows growth trends over time, regional groupings, population variation;
+          could show more dramatic changes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gapminder-style country data with GDP, life expectancy, and population
+          - classic, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: GDP 3k-20k, life expectancy 58-78 years, populations 20-120M are
+          reasonable; some base values could be more varied
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, plotly.express imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly Express API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of px.scatter animation_frame, custom updatemenus for Play/Pause,
+          custom sliders. Could use hover_data more extensively or add animation trails.
+  verdict: APPROVED
diff --git a/plots/scatter-animated-controls/metadata/pygal.yaml b/plots/scatter-animated-controls/metadata/pygal.yaml
index a44bb906d7..9e4c9f9546 100644
--- a/plots/scatter-animated-controls/metadata/pygal.yaml
+++ b/plots/scatter-animated-controls/metadata/pygal.yaml
@@ -26,3 +26,169 @@ review:
     indicator dots should be larger for better readability
   - Some of the smaller data points (particularly the tiny ones for Region 3) could
     benefit from slightly larger minimum sizes
+  image_description: 'The plot displays a scatter plot showing country development
+    metrics for year 2021. It features 12 data points representing countries, plotted
+    with GDP per Capita (thousands USD) on the x-axis (ranging 0-50) and Life Expectancy
+    (years) on the y-axis (ranging ~50-90). Points are color-coded by region: Region
+    1 in dark blue (#306998), Region 2 in yellow/gold (#FFD43B), and Region 3 in olive
+    green (#6B8E23). Point sizes vary based on population, with the largest point
+    being a green circle around (30, 80). A large semi-transparent "2021" watermark
+    appears in the lower-right area of the plot. The title correctly shows "scatter-animated-controls
+    · pygal · pyplots.ai". The legend appears at the bottom with all three regions.
+    Grid lines are subtle and dotted.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks all clearly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Markers are visible with good size variation encoding population,
+          though some smaller points could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and olive green are colorblind-safe and distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot fills appropriate area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "GDP per Capita (thousands USD)"
+          and "Life Expectancy (years)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend at bottom uses tiny colored squares that are barely visible;
+          the legend markers should be larger
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct XY scatter plot for Gapminder-style visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=GDP, Y=Life Expectancy, Size=Population, Color=Region correctly
+          mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has slider controls in HTML, year display as watermark, but PNG preview
+          is static (acceptable per spec note)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Region 1, 2, 3
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: false
+        comment: Title format is correct but the legend marker squares are too small
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in GDP, life expectancy, population size, and regional
+          groupings across 12 countries
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gapminder-style country development metrics is a classic, neutral,
+          real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (GDP 3-43k, Life expectancy 57-85 years), though
+          some high GDP values push limits
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean linear flow with data generation, chart creation, and output
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (pygal, numpy, cairosvg, PIL, json, io)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/scatter-annotated/metadata/altair.yaml b/plots/scatter-annotated/metadata/altair.yaml
index 9d6fb8e8fb..55e256e629 100644
--- a/plots/scatter-annotated/metadata/altair.yaml
+++ b/plots/scatter-annotated/metadata/altair.yaml
@@ -22,3 +22,179 @@ review:
   - Missing connector lines/arrows from labels to points as mentioned in specification
     notes
   - Could leverage more Altair-specific features like conditional formatting or selections
+  image_description: The plot displays a scatter plot with 10 tech company data points
+    showing Revenue (Billions USD) on the x-axis (ranging from 0-640) and Profit Margin
+    (%) on the y-axis (ranging from 0-60). Each point is rendered as a blue circle
+    (#306998) with moderate opacity. Company names are displayed as bold dark text
+    labels positioned to the upper-right of each point. Companies shown include NVIDIA
+    (high profit margin ~55%, low revenue ~61B), Apple (high revenue ~385B, moderate
+    margin ~25%), Microsoft, Amazon (highest revenue ~574B, lowest margin ~6%), Google,
+    Meta, Adobe, Oracle, Tesla, and Intel. The title "scatter-annotated · altair ·
+    pyplots.ai" appears centered at the top. Grid lines are subtle with low opacity.
+    The overall layout is clean and balanced.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (28pt), axis labels are 22pt, tick labels are 18pt,
+          all clearly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels are well-positioned with dx=12, dy=-8
+          offset
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size=250 with opacity=0.7 is appropriate for 10 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall balance, but data clusters in left-center leaving right
+          side sparse (this is data-driven, not a layout issue per se)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Revenue (Billions USD)" and "Profit Margin
+          (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (0.3 opacity), but no legend is present (though not
+          strictly needed for this single-series plot)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with text annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=revenue, Y=profit_margin correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has text labels near points; however, spec mentions "subtle connecting
+          lines or arrows from labels to points when offset" which is missing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding (0-620 for x, 0-60 for
+          y)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-annotated · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows variation: high margin/low revenue (NVIDIA), low margin/high
+          revenue (Amazon), and middle performers'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real tech companies with realistic financial metrics; neutral business
+          topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue and profit margin values are realistic for these companies
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → points layer → labels layer →
+          combine → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually deterministic arrays
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses Altair's layered grammar (points + labels), tooltip encoding,
+          and declarative style. However, could leverage more Altair-specific features
+          like interactive selections, conditional encodings, or text adjustments
+  verdict: APPROVED
diff --git a/plots/scatter-annotated/metadata/bokeh.yaml b/plots/scatter-annotated/metadata/bokeh.yaml
index 6ce2b581d8..f4d587fa0a 100644
--- a/plots/scatter-annotated/metadata/bokeh.yaml
+++ b/plots/scatter-annotated/metadata/bokeh.yaml
@@ -28,3 +28,182 @@ review:
     could benefit from collision avoidance
   - Does not leverage Bokeh's distinctive hover tooltip feature which would enhance
     the visualization
+  image_description: The plot displays a scatter chart with 15 blue circular markers
+    representing tech companies, plotted on a light gray background (#fafafa). The
+    x-axis shows "Revenue (Billions $)" ranging from approximately 4 to 26, and the
+    y-axis shows "Market Cap (Billions $)" ranging from approximately 18 to 100. Each
+    data point has a bold text label positioned to the upper-right of the marker,
+    displaying company names like "WebScale", "CloudNet", "TechCorp", etc. The title
+    "scatter-annotated · bokeh · pyplots.ai" appears centered at the top. A subtle
+    dashed grid (alpha 0.3) provides reference lines. The markers use a Python blue
+    color (#306998) with ~0.7 alpha and darker outlines. The plot shows a clear positive
+    correlation between revenue and market cap.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, axis labels at 32pt, tick labels at 24pt, annotations
+          at 24pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 6
+        max: 8
+        passed: true
+        comment: Minor label proximity issues between "ByteWorks"/"CyberLink" and
+          "SoftPeak"/"NetFlow", but all remain readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size=40 with alpha=0.7 is perfect for 15 data points, line_width=3
+          outline adds clarity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color (#306998) with good contrast against light background,
+          no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, data spread across the space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Revenue (Billions
+          $)", "Market Cap (Billions $)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend present (not strictly needed for single series), but spec
+          mentions connecting lines from labels to points which are absent
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with text annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=revenue, Y=market cap, labels=company names - all correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Text annotations present but missing "subtle connecting lines or
+          arrows from labels to points when offset" mentioned in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 data points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-annotated · bokeh · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows range from low revenue/low cap (DevOps: 4.5B/18.9B) to high
+          revenue/high cap (WebScale: 25.6B/98.2B), good spread demonstrating annotation
+          patterns'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech company market performance is a neutral, realistic business
+          scenario perfectly suited for this plot type
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue 4.5-25.6B and market cap 18.9-98.2B are realistic for tech
+          companies
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: numpy, bokeh.io.export_png, bokeh.models.ColumnDataSource/LabelSet,
+          bokeh.plotting.figure'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses deprecated 'p.scatter()' - should use 'p.circle()' or 'p.scatter()'
+          is fine actually in current Bokeh
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource and LabelSet for annotations, but doesn't
+          leverage Bokeh's interactive hover tooltips which would be a distinctive
+          feature
+  verdict: APPROVED
diff --git a/plots/scatter-annotated/metadata/highcharts.yaml b/plots/scatter-annotated/metadata/highcharts.yaml
index 8aecd0be95..dc21e7ffe2 100644
--- a/plots/scatter-annotated/metadata/highcharts.yaml
+++ b/plots/scatter-annotated/metadata/highcharts.yaml
@@ -27,3 +27,180 @@ review:
   - Grid lines could be slightly more visible (alpha 0.15 is quite faint)
   - Some labels could benefit from smart positioning (e.g., labels near axis edges
     like DataFlow appear slightly cramped)
+  image_description: The plot displays a scatter chart with 15 data points representing
+    tech companies. Each point is a semi-transparent blue circle (rgba blue ~#306998
+    with alpha 0.7) with company name labels positioned above each point. The title
+    "scatter-annotated · highcharts · pyplots.ai" appears at the top in bold. The
+    X-axis shows "Annual Revenue ($ millions)" ranging from 0 to 500+, and the Y-axis
+    shows "Year-over-Year Growth (%)" ranging from approximately -14 to 96. Companies
+    like PrimeData (high growth ~89%), SyncLabs (~67%), and QuantumIO (negative growth
+    ~-8%) are clearly labeled. The background is white with subtle gray grid lines.
+    Labels have white text outlines for readability.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable. Data labels
+          are legible with good text outline. Slightly smaller than optimal for 4800px
+          canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: 'No overlapping text thanks to `allowOverlap: false` setting. All
+          labels are fully readable.'
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (radius 20) with good alpha (0.7). Could be
+          slightly larger for this data density (15 points).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe. No problematic color
+          combinations.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good margins and plot fills canvas well. Minor asymmetry with some
+          empty space in upper-left quadrant.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Annual Revenue ($ millions)" and "Year-over-Year
+          Growth (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.15). Legend disabled which is appropriate
+          for single series, but grid could be slightly more visible.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped (revenue vs growth)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Text annotations present via dataLabels, points have alpha transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-annotated · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows variety: high performers (PrimeData), low performers (QuantumIO
+          with negative growth), mid-range companies. Good spread across revenue range.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech companies with revenue and growth metrics is a realistic, neutral
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue 50-500M and growth -10% to 90% are plausible. Some growth
+          values (89%) are on the high end but acceptable for tech.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses raw dict config instead of highcharts-core Python library (not
+          deprecated, but doesn't follow library rules pattern)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts dataLabels feature for annotations with text outline.
+          Could leverage more Highcharts-specific features like hover tooltips configuration
+          or animation settings.
+  verdict: APPROVED
diff --git a/plots/scatter-annotated/metadata/letsplot.yaml b/plots/scatter-annotated/metadata/letsplot.yaml
index b081d3f439..24c531828e 100644
--- a/plots/scatter-annotated/metadata/letsplot.yaml
+++ b/plots/scatter-annotated/metadata/letsplot.yaml
@@ -22,3 +22,161 @@ review:
   weaknesses:
   - Some label pairs are close together (ByteWorks/SmartScale, DataSys/LogiCore/CloudNet
     region) - could benefit from adjustText-like positioning if available in lets-plot
+  image_description: The plot displays a scatter chart showing 15 company names plotted
+    by Annual Revenue (x-axis, in $ millions, ranging from ~60 to ~540) against Year-over-Year
+    Growth (y-axis, in %, ranging from 0 to 55). Each data point is a blue (#306998)
+    circle with alpha=0.7, and company names appear as dark gray text labels positioned
+    above each point. The title "scatter-annotated · lets-plot · pyplots.ai" appears
+    at the top. Notable outliers include QuantumAI (low revenue, high growth ~48%),
+    NeuraTech (high revenue ~480M, high growth ~42%), and MegaSoft (highest revenue
+    ~520M, low growth ~8%). The plot uses a minimal theme with subtle gray grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text clearly readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 4
+        max: 8
+        passed: true
+        comment: some labels are close together (ByteWorks/SmartScale, DataSys/LogiCore
+          area) though still readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: markers well-sized with appropriate alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: single blue color, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: good proportions, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive with units ("Annual Revenue ($ millions)", "Year-over-Year
+          Growth (%)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: subtle grid, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct scatter plot with text annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=revenue, Y=growth, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: text labels present for all points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'shows variety: high/low revenue, high/low growth, outliers'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: plausible business scenario (company revenue vs growth)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: sensible values for revenue (50-520M) and growth (5-48%)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → plot → save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only necessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/scatter-annotated/metadata/matplotlib.yaml b/plots/scatter-annotated/metadata/matplotlib.yaml
index 3288285228..535a75537e 100644
--- a/plots/scatter-annotated/metadata/matplotlib.yaml
+++ b/plots/scatter-annotated/metadata/matplotlib.yaml
@@ -21,3 +21,180 @@ review:
   weaknesses:
   - Does not use adjustText or similar library for automatic label placement as suggested
     in spec (though manual approach works well for this dataset)
+  image_description: 'The plot displays a scatter chart with 15 data points representing
+    technology companies. Each point is rendered as a moderately large circle (~s=250)
+    in Python Blue (#306998) with white edge outlines and 0.7 alpha transparency.
+    The X-axis shows "Market Capitalization ($ Billions)" ranging from 0 to 240, while
+    the Y-axis displays "Annual Revenue ($ Billions)" from 0 to 70. Each data point
+    has a company name label (e.g., "QuantumAI", "ByteWorks", "TechCorp") connected
+    by subtle gray connector lines. The title follows the required format: "scatter-annotated
+    · matplotlib · pyplots.ai". A dashed gray grid with 0.3 alpha provides reference
+    lines. The layout is 16:9 landscape format with good margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, annotations
+          at 13pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All annotations are carefully offset with custom positioning to avoid
+          overlap; no text collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size s=250 is appropriate for 15 points; alpha=0.7 allows
+          visibility of any overlap
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single Python Blue color with white edge; no color-based differentiation
+          needed
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot fills ~60% of area, slight imbalance with
+          more whitespace on left side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Market Capitalization
+          ($ Billions)" and "Annual Revenue ($ Billions)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate (alpha=0.3), but no legend present; however,
+          this plot type doesn't require a legend since data is annotated directly
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct annotated scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (market cap vs revenue)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: scatter points, text labels, connecting
+          lines, alpha transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding (0-240 and 0-70)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (annotations serve as identification)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows variety: small companies (ByteWorks), large ones (QuantumAI),
+          spread across the range demonstrating correlation'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech company market cap vs revenue is a real, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for tech companies (market cap 18-225B, revenue
+          4-65B)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → annotations → styling
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set (though data is actually deterministic
+          arrays)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current ax.annotate API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic annotate() method; could have used adjustText library
+          or more advanced annotation features, but the manual offset approach is
+          acceptable for the data size
+  verdict: APPROVED
diff --git a/plots/scatter-annotated/metadata/plotly.yaml b/plots/scatter-annotated/metadata/plotly.yaml
index 2938f1e1d5..91fc71c1c3 100644
--- a/plots/scatter-annotated/metadata/plotly.yaml
+++ b/plots/scatter-annotated/metadata/plotly.yaml
@@ -24,3 +24,185 @@ review:
   - Does not fully leverage plotly interactive capabilities as a distinctive feature
     (hover is secondary to static annotations)
   - Grid opacity (0.1) is too subtle and could be slightly more visible (0.2-0.3 recommended)
+  image_description: The plot displays a scatter chart of 12 tech companies comparing
+    Market Cap (x-axis, in Billion USD) vs Annual Revenue (y-axis, in Billion USD).
+    Each blue circular marker represents a company, with connecting arrows pointing
+    to white-background annotation boxes containing company names in bold text. The
+    title "scatter-annotated · plotly · pyplots.ai" appears at the top center. Companies
+    shown include Amazon (highest revenue ~520B), Apple (highest market cap ~2800B),
+    Microsoft, Alphabet, Meta, Tesla, Nvidia, Samsung, TSMC, Oracle, Salesforce, and
+    Netflix. The plot uses a clean white template with subtle grid lines, and annotations
+    are well-positioned with minimal overlap.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear, axis labels are well-sized (24pt), tick
+          fonts are readable (18pt), annotations at 18pt are perfectly legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All annotations are manually positioned with custom offsets to avoid
+          overlap; each label is clearly readable without collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers size=20 with opacity=0.7 are well-sized for 12 points; white
+          border provides good contrast. Slightly smaller than optimal for this data
+          density.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe; good contrast against
+          white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space; plot fills ~60% of canvas; margins are
+          balanced. Minor: bottom-left area has some label clustering'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Market Cap (Billion USD)" and "Annual Revenue
+          (Billion USD)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), no legend needed for this plot type.
+          Grid could be slightly more visible.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=market cap (numeric), Y=revenue (numeric), labels=company names
+          (string) - all correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: text annotations, connecting arrows from
+          labels to points, moderate transparency (0.7), appropriate font size'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding (range extends beyond
+          min/max values)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-annotated · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows variety of annotation positions, different data point distributions.
+          Good spread across quadrants. Minor: could show more extreme outliers.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real tech companies with plausible market cap and revenue values;
+          neutral business topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Values are realistic for 2024 tech companies. Minor: Nvidia''s low
+          revenue relative to market cap is accurate but some values slightly dated.'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → figure → trace → annotations →
+          layout → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set (though not strictly needed since data
+          is hardcoded)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses graph_objects annotations with arrows, but doesn't leverage
+          plotly's interactive hover (disabled by using static mode primarily). Hover
+          template is defined but the HTML export is secondary. Could better showcase
+          plotly's interactivity as a core feature.
+  verdict: APPROVED
diff --git a/plots/scatter-annotated/metadata/plotnine.yaml b/plots/scatter-annotated/metadata/plotnine.yaml
index f55404eaaa..1789f71643 100644
--- a/plots/scatter-annotated/metadata/plotnine.yaml
+++ b/plots/scatter-annotated/metadata/plotnine.yaml
@@ -22,3 +22,172 @@ review:
   - Grid lines could be more subtle (add alpha parameter if possible)
   - Point size could be slightly larger for only 10 data points (size=8-10 would be
     more visible)
+  image_description: 'The plot displays a scatter chart showing 10 technology companies
+    plotted by Annual Revenue ($ Millions) on the x-axis and Profit Margin (%) on
+    the y-axis. Each blue circular data point (with alpha transparency ~0.7) has its
+    company name label positioned directly above it in dark gray text. Companies range
+    from TechCorp (lowest: ~$35M revenue, ~6% margin) to AI Labs (highest: ~$200M
+    revenue, ~23% margin). The plot uses a clean minimal theme with subtle gray grid
+    lines. The title "scatter-annotated · plotnine · pyplots.ai" appears at the top.
+    All text is clearly legible with appropriate font sizes.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and annotations all clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; data points strategically spread to avoid label
+          collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points sized well (size=6), alpha=0.7 appropriate; could be slightly
+          larger for 10 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight extra whitespace on left side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Annual Revenue ($ Millions)", "Profit Margin
+          (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: annotated scatter plot'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped (revenue vs profit margin)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: scatter points, text labels, appropriate alpha'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with proper limits set
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-annotated · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good spread of companies across revenue/margin spectrum; could
+          show more outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Realistic business scenario: company revenue vs profit margin analysis'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for mid-sized tech companies; revenue ranges
+          sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (aes, geom_point, geom_text, theme_minimal) correctly;
+          could leverage scale_color_brewer or other plotnine-specific features
+  verdict: APPROVED
diff --git a/plots/scatter-annotated/metadata/pygal.yaml b/plots/scatter-annotated/metadata/pygal.yaml
index d0276a4fd1..3996450319 100644
--- a/plots/scatter-annotated/metadata/pygal.yaml
+++ b/plots/scatter-annotated/metadata/pygal.yaml
@@ -25,3 +25,175 @@ review:
   - Legend at bottom is redundant with on-chart annotations and appears very small/hard
     to read
   - Some color pairs are similar (two purples, two greens) which could cause confusion
+  image_description: The plot displays a scatter chart with 12 tech company data points
+    plotted on a white background. The X-axis shows "Market Cap (Billion $)" ranging
+    from 0 to 160, and the Y-axis shows "Annual Revenue (Billion $)" ranging from
+    0 to ~65. Each company is represented by a distinctly colored dot with its name
+    annotated directly next to the point. Companies include TechFlow (blue, bottom-left),
+    DataPrime (yellow), ByteLogic (light blue), CyberLink (green), CloudNine (red),
+    SoftEdge (orange), DevStack (teal), DigiTech (purple), NetWave (green), AppForge
+    (dark gray), CodeSphere (purple), and WebCore (gold, top-right). The title "scatter-annotated
+    · pygal · pyplots.ai" appears at the top center. A legend at the bottom lists
+    all 12 companies in 6 columns. Grid lines are visible in light gray. The annotations
+    are clearly readable and well-positioned next to each point.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and annotations are readable; tick labels slightly
+          small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, data points well-spaced, annotations clearly
+          positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers appropriately sized for 12 data points, good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good variety of colors, though some similar shades (two purples,
+          two greens)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot fills appropriate space, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Market Cap (Billion $)", "Annual Revenue
+          (Billion $)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend at bottom is very small and hard to read; redundant with annotations
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with annotations
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (market cap vs revenue)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: scatter points, text labels/annotations'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-annotated · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variety of company sizes; could have more outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech company market cap vs revenue is a realistic, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible for tech companies; some correlation visible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually deterministic
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (HTML output not an issue but seed
+          import unused)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal XY chart with individual series for colors, custom Style,
+          print_values for annotations; could leverage tooltips more
+  verdict: APPROVED
diff --git a/plots/scatter-annotated/metadata/seaborn.yaml b/plots/scatter-annotated/metadata/seaborn.yaml
index baaa696ee6..4c7ee7c2d1 100644
--- a/plots/scatter-annotated/metadata/seaborn.yaml
+++ b/plots/scatter-annotated/metadata/seaborn.yaml
@@ -23,3 +23,178 @@ review:
   weaknesses:
   - Grid styling could be more subtle (consider reducing grid alpha or using a lighter
     style)
+  image_description: The plot displays a scatter visualization of 15 technology companies
+    with Annual Revenue ($ Billion) on the X-axis (0-80) and Market Capitalization
+    ($ Billion) on the Y-axis (~25-350). Each blue data point (with white edge) represents
+    a company and has a text label annotation connected by subtle gray lines. The
+    labels include company names like VirtualAI, CloudNet, DataSys, QuantumX, CodeBase,
+    AILabs, DevOps, TechCorp, NetFlow, SecureIT, CyberSec, SmartHub, ByteLogic, AppWorks,
+    and StreamIO. The title follows the correct format "scatter-annotated · seaborn
+    · pyplots.ai". The plot uses a clean whitegrid background with subtle blue gridlines.
+    Labels are well-distributed using adjustText, with minimal overlap visible.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: adjustText handles most overlaps well, very minor proximity between
+          some labels (e.g., AILabs/DevOps area)
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers at s=200 with alpha=0.7 are perfectly sized for 15 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind palette, good contrast with blue markers
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout, slight excess whitespace on left side due to x-axis
+          starting at 0
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Annual Revenue ($ Billion)" and
+          "Market Capitalization ($ Billion)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed (single color), but grid could be more subtle (currently
+          default whitegrid)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct annotated scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (revenue vs market cap)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: text labels, connecting lines from labels
+          to points, alpha transparency on points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-annotated · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good variation in both dimensions, positive correlation visible,
+          some clustering that tests label adjustment
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech companies with revenue/market cap is a realistic, neutral business
+          scenario matching spec application examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for large tech companies ($5-80B revenue, ~$30-330B
+          market cap), though some market cap multiples are quite high
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn, adjustText)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using explicit color instead of hue for single-color plot triggers
+          a minor style note, but functional
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn scatterplot with DataFrame and set_theme, but the annotation
+          work is done via matplotlib text and adjustText rather than seaborn-specific
+          features
+  verdict: APPROVED
diff --git a/plots/scatter-basic/metadata/altair.yaml b/plots/scatter-basic/metadata/altair.yaml
index f36dd409a4..e063a0f0b0 100644
--- a/plots/scatter-basic/metadata/altair.yaml
+++ b/plots/scatter-basic/metadata/altair.yaml
@@ -23,3 +23,169 @@ review:
     ~4.5) - consider using scale domain or padding
   - Axis labels are generic (X Value, Y Value) - could use a realistic scenario from
     spec applications (e.g., Study Hours, Exam Score)
+  image_description: The plot displays a scatter plot with approximately 100 data
+    points in a blue color (#306998). The title "scatter-basic · altair · pyplots.ai"
+    is shown at the top center. The X-axis is labeled "X Value" ranging from 0 to
+    14, and the Y-axis is labeled "Y Value" ranging from 0 to 16. Data points are
+    concentrated in the range of x=5-14 and y=3-14, showing a clear positive linear
+    correlation. Points have moderate transparency (opacity 0.7) and are filled circles
+    of consistent size. A subtle grid (alpha 0.3) helps with value estimation. The
+    overall layout is clean with a 16:9 aspect ratio.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at fontSize 28, labels at 22, ticks at 18 - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are visible with good size (200) and opacity (0.7), slightly
+          large for 100 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though left side has excessive whitespace (X starts
+          at 0 while data starts ~4.5)
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Value" / "Y Value" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at 0.3 opacity, no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: transparency, axis labels, title, grid'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series scatter
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive correlation well, but could show more variation (outliers,
+          clusters)
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic x/y values are plausible but not tied to a real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are reasonable numeric range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses declarative encoding, tooltips, and configure methods, but could
+          use more Altair-specific features like interactive selection
+  verdict: APPROVED
diff --git a/plots/scatter-basic/metadata/highcharts.yaml b/plots/scatter-basic/metadata/highcharts.yaml
index 64ea2912f8..8467d3a797 100644
--- a/plots/scatter-basic/metadata/highcharts.yaml
+++ b/plots/scatter-basic/metadata/highcharts.yaml
@@ -28,3 +28,171 @@ review:
     points
   - Does not leverage Highcharts tooltip/hover interactivity features visible in the
     static image
+  image_description: The plot displays a scatter plot on a white background with approximately
+    100 data points. The points are rendered as filled circles in a muted blue color
+    (Python Blue) with transparency (alpha ~0.7). The title "scatter-basic · highcharts
+    · pyplots.ai" appears at the top in bold black text. The X-axis is labeled "X
+    Value" (ranging from ~4.8 to ~13.6) and the Y-axis is labeled "Y Value" (ranging
+    from ~2 to ~15). Both axes have subtle gray dashed grid lines. The data shows
+    a clear positive correlation pattern with some scatter/noise. The overall layout
+    is clean and professional with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          high resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized for 100 data points with good transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color (Python Blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though slight extra whitespace at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Value", "Y Value") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed style and appropriate alpha; legend disabled
+          (appropriate for single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: transparency, axis labels, title, grid
+          lines'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled for single-series scatter
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows correlation pattern with noise, demonstrates typical scatter
+          behavior
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible data range, shows meaningful positive correlation
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sensible numeric values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses highcharts-core properly with ScatterSeries, but doesn't leverage
+          advanced Highcharts features like tooltips or interactive hover states in
+          the static output
+  verdict: APPROVED
diff --git a/plots/scatter-basic/metadata/letsplot.yaml b/plots/scatter-basic/metadata/letsplot.yaml
index ee78c4af77..a6814d2e62 100644
--- a/plots/scatter-basic/metadata/letsplot.yaml
+++ b/plots/scatter-basic/metadata/letsplot.yaml
@@ -23,3 +23,166 @@ review:
   - Data labels use generic X Value/Y Value instead of a realistic scenario (e.g.,
     Study Hours vs Exam Score)
   - Title text could be slightly larger for better visibility at full resolution
+  image_description: The plot displays a 2D scatter chart with approximately 150 data
+    points in a muted blue color (#306998) with transparency (alpha 0.7). The title
+    "scatter-basic · lets-plot · pyplots.ai" appears at the top-left in gray text.
+    The X-axis is labeled "X Value" ranging from approximately 4.5 to 15, and the
+    Y-axis is labeled "Y Value" ranging from approximately 1 to 18. The points show
+    a clear positive linear correlation with moderate scatter/noise around the trend.
+    A subtle dashed gray grid provides reference lines. The plot uses a 16:9 aspect
+    ratio with a clean minimal theme and white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: all text readable, font sizes appropriate but could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: markers well-sized for 150 points with good alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: single blue color, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: good 16:9 proportions, data fills space well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: descriptive labels but missing units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: subtle dashed grid, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct 2D scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: transparency, axis labels, title, grid all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series scatter
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses "scatter-basic · lets-plot · pyplots.ai" format
+    data_quality:
+      score: 15
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows positive correlation with scatter noise
+      - id: DQ-02
+        name: Realistic Context
+        score: 3
+        max: 7
+        passed: false
+        comment: generic "X Value"/"Y Value" lacks real-world context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: sensible numeric ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: ggplot grammar, theme_minimal(), element_text customization, ggsize(),
+          exports PNG with scale and interactive HTML
+  verdict: APPROVED
diff --git a/plots/scatter-basic/metadata/matplotlib.yaml b/plots/scatter-basic/metadata/matplotlib.yaml
index 34787de3a2..45f47a8a18 100644
--- a/plots/scatter-basic/metadata/matplotlib.yaml
+++ b/plots/scatter-basic/metadata/matplotlib.yaml
@@ -25,3 +25,176 @@ review:
   - Does not leverage distinctive matplotlib features (e.g., annotations, colorbar,
     secondary elements)
   - Marker size s=180 slightly exceeds guidelines for this data density
+  image_description: 'The plot displays a 2D scatter plot with a clear positive correlation
+    between study hours (x-axis, ranging from 1 to 12 hours per week) and exam scores
+    (y-axis, ranging from approximately 45% to 100%). The data points are rendered
+    as medium blue circles (#306998) with white edge outlines, using appropriate transparency
+    (alpha=0.7) that reveals overlapping points. The title "scatter-basic · matplotlib
+    · pyplots.ai" appears at the top in large text. Axis labels are clear: "Study
+    Hours (per week)" on x-axis and "Exam Score (%)" on y-axis. A subtle dashed grid
+    with low opacity helps with value estimation. The 16:9 landscape layout is well-balanced
+    with good use of the plotting area. There are approximately 120 data points showing
+    natural scatter around a linear trend, with some clustering visible at the score
+    ceiling (100%).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers s=180 appropriate for 120 points, alpha=0.7 reveals overlaps
+          well, though slightly large per guidelines (100-200 recommended for 30-100
+          pts)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, Python blue (#306998) with white edges, good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 16:9 canvas, good margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Study Hours (per week)" and "Exam Score
+          (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3, dashed), but no legend present - not required
+          for single-series scatter
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=independent variable (study hours), Y=dependent variable (exam
+          scores)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Transparency (alpha=0.7), axis labels, title, grid lines all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 120 points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows correlation pattern, scatter/noise, ceiling effect at 100%,
+          range of values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Study hours vs exam scores is a classic, relatable educational scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Hours 1-12 per week and scores 0-100% are realistic ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures deterministic results
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current ax.scatter() syntax
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Basic scatter usage without advanced matplotlib features like edgecolors
+          pattern fills, annotations, or secondary axes
+  verdict: APPROVED
diff --git a/plots/scatter-basic/metadata/plotly.yaml b/plots/scatter-basic/metadata/plotly.yaml
index 092a46b229..4343316cc8 100644
--- a/plots/scatter-basic/metadata/plotly.yaml
+++ b/plots/scatter-basic/metadata/plotly.yaml
@@ -26,3 +26,172 @@ review:
   - Does not fully leverage Plotly distinctive interactive features beyond basic hover
   - Data could include a few more obvious outliers to better demonstrate scatter plot
     use cases
+  image_description: The plot displays a scatter plot with 100 data points showing
+    the relationship between Study Hours (x-axis, ranging from 1-10) and Exam Score
+    in percentage (y-axis, ranging from ~40-100). Points are rendered in a muted blue
+    color (#306998) with moderate transparency (0.7 opacity). The title "scatter-basic
+    · plotly · pyplots.ai" is centered at the top. Both axes have clear labels with
+    units ("Study Hours (h)" and "Exam Score (%)"). The background is clean white
+    with subtle gray gridlines. The data shows a clear positive correlation - as study
+    hours increase, exam scores tend to increase, with natural scatter/noise around
+    the trend.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Marker size 16 with 100 points is appropriate, though slightly on
+          the larger side for this density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-proportioned margins, good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Study Hours (h)" and "Exam Score
+          (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (independent → dependent)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Transparency (0.7), axis labels, title, grid lines all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, correctly omitted
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive correlation with scatter, but could benefit from some
+          outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Study hours vs exam scores is a perfect educational scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Hours 1-10 and scores 40-100 are realistic, though the linear relationship
+          is slightly too clean
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png at correct resolution
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Has hovertemplate which is good, but doesn't generate HTML output
+          or leverage more advanced Plotly interactivity features
+  verdict: APPROVED
diff --git a/plots/scatter-basic/metadata/plotnine.yaml b/plots/scatter-basic/metadata/plotnine.yaml
index 0fd6d1a69e..c2b4c7976e 100644
--- a/plots/scatter-basic/metadata/plotnine.yaml
+++ b/plots/scatter-basic/metadata/plotnine.yaml
@@ -25,3 +25,174 @@ review:
   - Does not utilize color or size aesthetics to demonstrate additional plotnine capabilities
   - Grid configuration uses element_line with alpha parameter which may not fully
     render as expected in all plotnine versions
+  image_description: The plot displays a 2D scatter plot with "Study Hours (per week)"
+    on the x-axis (ranging from ~1 to 10) and "Exam Score (points)" on the y-axis
+    (ranging from ~20 to 106). The data points are rendered as blue circles (#306998)
+    with moderate transparency (alpha=0.7). The title "scatter-basic · plotnine ·
+    pyplots.ai" appears at the top in a clear font. The plot uses a minimal theme
+    with subtle gray grid lines. The 150 data points show a clear positive correlation
+    between study hours and exam scores, with visible scatter/noise around the trend
+    line. The layout is clean with good proportions in 16:9 aspect ratio.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized for 150 points with appropriate alpha, minor deduction
+          as markers could be slightly larger for this density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 16:9 aspect ratio with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Study Hours (per week)" and "Exam
+          Score (points)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is very subtle (alpha=0.3 for major, 0.2 for minor), but no
+          legend needed for single-series scatter
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (study hours → x, exam scores → y)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has transparency, grid lines, axis labels, title as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive correlation and scatter well, but doesn't demonstrate
+          outliers explicitly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: '"Study hours vs exam scores" is a real, comprehensible educational
+          scenario'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Realistic values: 1-10 hours/week, scores ~20-106 (plausible exam
+          range)'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean Imports → Data → Plot → Save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only used imports present
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern plotnine API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics (ggplot + geom_point + theme_minimal),
+          but doesn't leverage advanced features like faceting, stats, or aesthetic
+          mappings for color/size
+  verdict: APPROVED
diff --git a/plots/scatter-basic/metadata/pygal.yaml b/plots/scatter-basic/metadata/pygal.yaml
index 1e989231de..2dc5c42100 100644
--- a/plots/scatter-basic/metadata/pygal.yaml
+++ b/plots/scatter-basic/metadata/pygal.yaml
@@ -23,3 +23,175 @@ review:
   - Axis labels lack units (generic X Value, Y Value)
   - Dot size could be slightly larger for better visibility at 100 data points
   - Data is generic random correlation rather than a realistic scenario
+  image_description: The plot displays a scatter visualization with approximately
+    100 blue data points (#306998 Python Blue) on a clean white background. The title
+    "scatter-basic · pygal · pyplots.ai" appears at the top in a readable font. The
+    X-axis is labeled "X Value" (range ~5-13) and Y-axis is labeled "Y Value" (range
+    ~3-14). Data points show a clear positive correlation pattern with appropriate
+    scatter/noise. Subtle dashed grid lines are present on both axes. The dots have
+    good visibility with apparent transparency for overlapping points. Layout uses
+    16:9 aspect ratio (4800×2700).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable. Font
+          sizes are well-calibrated for the 4800×2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. All labels are clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Dots are visible and appropriately sized (dots_size=12). For 100
+          points, this is reasonable though slightly on the smaller side.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (blue) with good contrast against white background.
+          No colorblind issues.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions overall, though there's slightly more whitespace
+          on the left side due to data distribution.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Value", "Y Value") but lack units.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines, no legend needed for single series
+          (correctly hidden).
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct XY scatter chart type using pygal.XY with stroke=False.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly assigned to horizontal and vertical axes.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: transparency (opacity=0.7), axis labels,
+          title, grid lines.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate margins.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly hidden for single series.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive correlation with noise, demonstrates typical scatter
+          patterns. Could show more variation (outliers, different density regions).
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible correlated data, though generic (random with linear relationship
+          + noise).
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible numeric ranges.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → style → chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal.XY for scatter, custom Style configuration, SVG-native
+          output with PNG export. Could leverage more pygal-specific features like
+          tooltips or value formatting.
+  verdict: APPROVED
diff --git a/plots/scatter-basic/metadata/seaborn.yaml b/plots/scatter-basic/metadata/seaborn.yaml
index c913100fe2..920cce6ac0 100644
--- a/plots/scatter-basic/metadata/seaborn.yaml
+++ b/plots/scatter-basic/metadata/seaborn.yaml
@@ -24,3 +24,175 @@ review:
   - Does not leverage seaborn distinctive statistical features (e.g., regplot, jointplot,
     or hue parameter for additional dimension)
   - Marker size s=200 is at the upper end of the recommended range for 150 points
+  image_description: The plot displays a 2D scatter plot with 150 blue circular markers
+    (#306998 color) with white edge borders. The X-axis shows "Study Hours (per week)"
+    ranging from approximately 1 to 10, and the Y-axis shows "Exam Score (points)"
+    ranging from about 10 to 120. The title correctly follows the format "scatter-basic
+    · seaborn · pyplots.ai". The data demonstrates a clear positive linear correlation
+    between study hours and exam scores with realistic noise/variance. Points have
+    moderate transparency (alpha 0.7) allowing overlapping points to be distinguished.
+    Subtle dashed grid lines (alpha 0.3) aid in value estimation. The layout is well-balanced
+    with proper use of the 16:9 aspect ratio.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers s=200 with alpha 0.7 is slightly large for 150 points (guideline
+          suggests 100-200 for 100-300 points), but still visible and distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Perfect 16:9 layout, good proportions, no cut-off
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Study Hours (per week)", "Exam Score (points)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle at alpha 0.3 with dashed style (good), but no legend
+          present (not needed for single-series scatter, but spec doesn't explicitly
+          exclude it)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 2D scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=study hours (independent), Y=exam scores (dependent) correctly
+          assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: transparency (alpha 0.7), axis labels,
+          title, grid lines'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 150 data points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series, N/A gives full points
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows correlation, scatter/noise, potential outliers (low score ~10
+          at ~1.5 hours), demonstrates typical scatter patterns as specified
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Study hours vs exam scores is a real, comprehensible scenario directly
+          from spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Study hours 1-10 per week and exam scores ~10-120 are realistic values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot, numpy, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.scatterplot which is basic seaborn. Could have used seaborn-specific
+          features like regplot for regression line, or leveraged seaborn's statistical
+          capabilities. Currently equivalent to matplotlib with seaborn styling.
+  verdict: APPROVED
diff --git a/plots/scatter-categorical/metadata/altair.yaml b/plots/scatter-categorical/metadata/altair.yaml
index 7a8c077959..ae2fe6581d 100644
--- a/plots/scatter-categorical/metadata/altair.yaml
+++ b/plots/scatter-categorical/metadata/altair.yaml
@@ -18,4 +18,15 @@ review:
   - Excellent font sizing across all text elements (title, axes, legend)
   - Good use of Altair declarative features including tooltips for interactivity
   - Clean code structure following KISS principles
-  weaknesses: []
+  weaknesses:
+  - None - implementation meets all quality criteria
+  image_description: The plot displays a categorical scatter plot showing Iris-like
+    petal measurements for three plant species. Setosa (blue circles) clusters tightly
+    in the bottom-left corner with petal lengths ~0.9-2.0 cm and widths ~0-0.4 cm.
+    Versicolor (yellow squares) occupies the middle region with petal lengths ~3.0-5.5
+    cm and widths ~0.9-2.0 cm. Virginica (olive-green triangles) extends to the upper-right
+    with petal lengths ~4.5-8.0 cm and widths ~1.0-2.7 cm. The three species form
+    well-separated clusters showing clear categorical distinction. Title reads "scatter-categorical
+    · altair · pyplots.ai" and axis labels include units (cm). Legend is positioned
+    in the upper-right corner with appropriately sized symbols.
+  verdict: APPROVED
diff --git a/plots/scatter-categorical/metadata/bokeh.yaml b/plots/scatter-categorical/metadata/bokeh.yaml
index c449935461..4b52fd11df 100644
--- a/plots/scatter-categorical/metadata/bokeh.yaml
+++ b/plots/scatter-categorical/metadata/bokeh.yaml
@@ -27,3 +27,188 @@ review:
     points between categories
   - Library features usage is good but not exceptional (could use linked selections
     or custom hover styling)
+  image_description: 'The plot displays a categorical scatter plot with three distinct
+    product categories (Product A, Product B, Product C) shown in three different
+    colors. Product A (blue, #306998) forms a cluster in the lower-left region with
+    Marketing Spend around 15-35 $K and Customer Engagement Scores around 15-40. Product
+    B (yellow, #FFD43B) occupies the upper-middle area with Marketing Spend around
+    35-65 $K and engagement scores of 55-90. Product C (red/coral, #E74C3C) is positioned
+    on the right side with Marketing Spend around 65-100 $K and engagement scores
+    of 35-65. The title "scatter-categorical · bokeh · pyplots.ai" appears at the
+    top. Axis labels are clear: "Marketing Spend ($K)" on x-axis and "Customer Engagement
+    Score" on y-axis. A legend in the top-right corner identifies each category. The
+    background is a subtle off-white (#fafafa) with dashed grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          4800x2700 resolution with appropriately large font sizes (48pt title, 36pt
+          axis labels, 28pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels, ticks, and legend are well-separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (size=25) with good alpha (0.7) for 150 points;
+          white outline helps distinguish overlapping points. Slightly smaller than
+          optimal for this point count.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red are colorblind-safe with good luminance contrast
+          between categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; plot area fills most of the image with
+          balanced margins. Legend placement is good but could be slightly better
+          integrated.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Marketing Spend ($K)" and "Customer
+          Engagement Score"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed). However, the legend appears outside
+          the plot area in the margin with small bokeh toolbar icons visible, which
+          is slightly distracting.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with categorical coloring
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y continuous variables correctly mapped, category determines
+          color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors per category, legend
+          for identification, alpha transparency for overlapping points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to Product A, B, C
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-categorical · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct clusters demonstrating categorical grouping
+          well. Could show more overlap between categories to demonstrate the utility
+          of categorical coloring in ambiguous regions.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Marketing spend vs customer engagement is a plausible, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (marketing spend in $K, engagement scores 0-100),
+          though some engagement scores could be slightly higher to use more of the
+          y-axis range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html for interactivity)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, hover tooltips, interactive tools (pan, wheel_zoom,
+          box_zoom, reset, save), and exports both PNG and HTML. Good use of Bokeh's
+          interactivity, but could leverage more advanced features like linked brushing
+          or custom callbacks.
+  verdict: APPROVED
diff --git a/plots/scatter-categorical/metadata/highcharts.yaml b/plots/scatter-categorical/metadata/highcharts.yaml
index 4083d9a887..58986e69c0 100644
--- a/plots/scatter-categorical/metadata/highcharts.yaml
+++ b/plots/scatter-categorical/metadata/highcharts.yaml
@@ -24,3 +24,183 @@ review:
     at smaller size, causing potential sizing issues
   - No alpha transparency on markers despite spec suggestion for overlapping points
   - Legend positioned at fixed pixel offset rather than responsive positioning
+  image_description: 'The plot displays a categorical scatter plot showing the relationship
+    between Nitrogen Applied (kg/ha) on the x-axis and Plant Growth (cm) on the y-axis.
+    Three fertilizer types are distinguished by color: Fertilizer A (blue #306998),
+    Fertilizer B (yellow #FFD43B), and Fertilizer C (purple #9467BD). The title "scatter-categorical
+    · highcharts · pyplots.ai" is prominently displayed at the top center. A vertical
+    legend in the top-right corner identifies each category. The plot shows clear
+    differentiation between groups - Fertilizer B (yellow) shows the highest growth
+    values, Fertilizer A (blue) shows moderate growth, and Fertilizer C (purple) shows
+    lower, more scattered values. Grid lines are subtle and do not distract from the
+    data. Markers are circular with white borders and appropriately sized for the
+    data density (~120 points total).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 48px, axis labels at 36px, tick labels at 28px - all very
+          readable. Slightly oversized for the canvas but clear.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers with radius 12 are well-sized for ~120 points, white borders
+          add distinction. Minor overlap in dense regions but acceptable.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Purple palette is colorblind-safe (no red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Plot fills most of the canvas with balanced margins. Bottom margin
+          (300px) creates some extra whitespace.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Nitrogen Applied (kg/ha)" and "Plant
+          Growth (cm)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle (rgba(0,0,0,0.1)), legend well-positioned in top-right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Nitrogen (continuous), Y=Growth (continuous), Color=Fertilizer
+          type (categorical) - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors per category, legend
+          for identification'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data series names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-categorical · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct categories with different correlation patterns.
+          Fertilizer B shows steeper slope and higher values, C shows more variance.
+          Good variety but could show more extreme outliers.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth study with fertilizer types is a real, neutral scientific
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Nitrogen 15-85 kg/ha and growth 0-70 cm are plausible. Some growth
+          values could be slightly more realistic (growth values starting near 0 would
+          be more realistic).
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions or classes, follows imports → data → plot → save pattern
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but also creates plot.html (acceptable but the
+          HTML variable naming reuses f-string incorrectly - `interactive_html` variable
+          contains `html_str` which uses original chart dimensions)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts scatter series with proper options, hover states,
+          and generates both PNG and HTML output. Could leverage more Highcharts features
+          like tooltips with custom formatting.
+  verdict: APPROVED
diff --git a/plots/scatter-categorical/metadata/matplotlib.yaml b/plots/scatter-categorical/metadata/matplotlib.yaml
index 40c4461a3b..f4d30b3296 100644
--- a/plots/scatter-categorical/metadata/matplotlib.yaml
+++ b/plots/scatter-categorical/metadata/matplotlib.yaml
@@ -23,3 +23,183 @@ review:
   - Library features could be more distinctive - matplotlib offers patheffects, custom
     marker paths, or gradient fills that are not utilized
   - Yellow markers with white background could have slightly better contrast
+  image_description: The plot shows a categorical scatter plot with three distinct
+    species groups displayed using different colors and marker shapes. Species A (blue
+    circles) appears in the lower-left cluster with petal lengths ~1-2 cm and widths
+    ~0.1-0.5 cm. Species B (yellow/gold squares) occupies the middle region with petal
+    lengths ~3-5 cm and widths ~1-1.8 cm. Species C (teal triangles) forms the upper-right
+    cluster with petal lengths ~4.5-7 cm and widths ~1.4-3.1 cm. The title reads "scatter-categorical
+    · matplotlib · pyplots.ai" at the top. Axis labels show "Petal Length (cm)" and
+    "Petal Width (cm)" with units. A legend in the upper-left clearly identifies the
+    three species. Grid lines are subtle with dashed style. The overall layout is
+    well-balanced with good use of the canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, legend at
+          16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size s=200 is appropriate for 120 points (40 per group), alpha=0.8
+          provides good visibility with slight transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue (#306998), yellow (#FFD43B), and teal (#2AA198) are distinguishable
+          and avoid red-green confusion. Different marker shapes (circle, square,
+          triangle) provide additional distinction. Minor deduction as yellow could
+          be slightly challenging against white background for some viewers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend well-positioned
+          in upper-left without overlapping data
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Petal Length (cm)"
+          and "Petal Width (cm)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid is subtle (alpha=0.3, dashed), legend well placed with framealpha=0.9.
+          Minor deduction: legend could have slightly more contrast/padding'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical scatter plot with distinct colors per category
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly assigned to continuous numeric variables, color
+          mapped to category
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors per category, legend included, varying marker shapes
+          for additional distinction, alpha transparency for overlaps
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors/shapes to Species A, B, C
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "scatter-categorical · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows three distinct categories with clear separation, demonstrates
+          correlation within groups. Minor deduction: could show overlapping groups
+          to demonstrate how colors distinguish overlapping data'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris-like petal measurements scenario is a classic, neutral, scientific
+          example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Petal measurements are realistic (1-7 cm length, 0-3 cm width).
+          Minor deduction: the gap between Species A and B/C is quite large (1-2cm
+          to 3-5cm jump)'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses standard scatter() with colors and markers. Could leverage matplotlib-specific
+          features like patheffects, custom markers, or colormaps more distinctively
+  verdict: APPROVED
diff --git a/plots/scatter-categorical/metadata/plotly.yaml b/plots/scatter-categorical/metadata/plotly.yaml
index f96962a98d..00c800a2f4 100644
--- a/plots/scatter-categorical/metadata/plotly.yaml
+++ b/plots/scatter-categorical/metadata/plotly.yaml
@@ -24,3 +24,182 @@ review:
     without being distracting
   - Legend border is minimal but could be removed entirely for cleaner look
   - Some sales growth values exceed 100% which is mathematically possible but unusual
+  image_description: 'The plot displays a categorical scatter plot showing the relationship
+    between Marketing Investment (%) on the X-axis and Sales Growth (%) on the Y-axis.
+    Four distinct regions are represented: North (blue/steel blue), South (yellow/gold),
+    West (purple/violet), and East (green/teal). Each region forms a visually distinguishable
+    cluster of approximately 40 data points. The title "scatter-categorical · plotly
+    · pyplots.ai" is centered at the top. The legend is positioned on the right side
+    with a "Region" header and shows all four categories. Grid lines are subtle (light
+    gray), and the background is clean white. Markers have slight transparency (0.7)
+    with white borders, making overlapping points still distinguishable.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap anywhere; legend is well-separated from data
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized (14px) with good alpha (0.7) for
+          ~160 total points; slight deduction as some points in dense areas could
+          benefit from slightly smaller markers
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, purple, and green palette is colorblind-safe; no red-green
+          only distinctions
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; legend positioned neatly
+          to the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Marketing Investment (%)" and "Sales
+          Growth (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha 0.1 which is good, but legend could use a
+          cleaner appearance (border is visible but thin)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct categorical scatter plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y continuous variables correctly assigned, category mapped
+          to color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors per category, legend,
+          transparency for overlapping points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to region names with clear "Region"
+          title
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "scatter-categorical · plotly · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows four distinct clusters with different center positions; demonstrates
+          category separation well. Minor deduction: clusters overlap somewhat which
+          is realistic but doesn''t show maximum distinction'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product performance across regions is a believable business scenario;
+          neutral topic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentage values are reasonable (20-120%), though some sales growth
+          values above 100% are unusual
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure; no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with custom hovertemplate for interactivity (visible
+          in HTML output), white marker borders. Could leverage more plotly-specific
+          features like animations or more advanced hover info
+  verdict: APPROVED
diff --git a/plots/scatter-categorical/metadata/plotnine.yaml b/plots/scatter-categorical/metadata/plotnine.yaml
index 2db43d4a1c..5bdf09c3ca 100644
--- a/plots/scatter-categorical/metadata/plotnine.yaml
+++ b/plots/scatter-categorical/metadata/plotnine.yaml
@@ -23,3 +23,179 @@ review:
   - Could leverage more plotnine-specific features like facet_wrap for small multiples
     or stat_smooth for trend lines
   - Legend title font size could be slightly larger for consistency with axis titles
+  image_description: 'The plot shows a categorical scatter plot with 120 data points
+    (40 per group) representing plant growth rates vs. temperature for three species.
+    Colors used are: **blue (#306998)** for Species A, **yellow (#FFD43B)** for Species
+    B, and **cyan (#4ECDC4)** for Species C. The plot has a clean white background
+    with subtle gray grid lines. The title "scatter-categorical · plotnine · pyplots.ai"
+    is at the top center. The x-axis is labeled "Temperature (°C)" ranging from about
+    10-70, and the y-axis is labeled "Growth Rate (cm/week)" ranging from about 15-65.
+    A legend on the right side maps colors to "Plant Species" categories. The three
+    species show different patterns: Species A (blue) clusters in lower temperature/lower
+    growth range with positive correlation, Species B (yellow) occupies higher temperature/mid-high
+    growth values, and Species C (cyan) is scattered in the middle temperature range
+    with high variability. Points have appropriate alpha transparency for visibility.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt, legend text
+          at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Point size=4 with alpha=0.7 works well for 120 points, though slightly
+          smaller markers could reduce overlap in dense regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and cyan are colorblind-safe (distinguishable for deuteranopia/protanopia)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot fills majority of space with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Temperature (°C)" and "Growth Rate
+          (cm/week)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend is well placed but could have title text slightly
+          larger
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with categorical coloring
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y continuous variables correctly assigned, color correctly maps
+          to category
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct colors per category, legend
+          for identification, alpha transparency for overlapping points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all three species with accurate color mapping
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "scatter-categorical · plotnine · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct groups with different correlation patterns (positive,
+          weaker, scattered), though all groups could show even more distinct clustering
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth rate vs temperature is a real, neutral scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature 10-70°C and growth rates 15-65 cm/week are plausible
+          for plant biology studies
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic plotnine features (ggplot + geom_point + scale_color_manual
+          + theme). Could use plotnine-specific features like faceting, stat layers,
+          or position adjustments for bonus points
+  verdict: APPROVED
diff --git a/plots/scatter-categorical/metadata/pygal.yaml b/plots/scatter-categorical/metadata/pygal.yaml
index 1d6ab62f6c..e8ad978e09 100644
--- a/plots/scatter-categorical/metadata/pygal.yaml
+++ b/plots/scatter-categorical/metadata/pygal.yaml
@@ -26,3 +26,171 @@ review:
     legend_at_bottom=True or repositioning
   - Marker stroke_width=0 removes outlines which could help distinguish overlapping
     points
+  image_description: 'The plot displays a categorical scatter plot visualizing Iris-like
+    flower measurements. Three distinct species clusters are shown: **Setosa** (blue,
+    #306998) in the lower-left with small petal dimensions (length ~1-2cm, width ~0.1-0.5cm),
+    **Versicolor** (yellow, #FFD43B) in the middle region (length ~3-5cm, width ~0.9-1.9cm),
+    and **Virginica** (green, #2ca02c) in the upper-right (length ~4.5-7cm, width
+    ~1.4-3cm). The title "scatter-categorical · pygal · pyplots.ai" appears at the
+    top center. Axis labels include units: "Petal Length (cm)" on X-axis and "Petal
+    Width (cm)" on Y-axis. A legend with colored squares is positioned in the top-left
+    corner. Subtle grid lines aid readability. The markers are well-sized dots with
+    transparency (opacity 0.7) allowing overlap visibility.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend are clearly readable; tick labels
+          slightly small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized with good alpha; dots_size=12 is appropriate for
+          120 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/green palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, balanced margins, legend near plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Petal Length (cm)", "Petal Width (cm)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend overlaps with plot area and first data point region
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct XY scatter chart with categorical coloring
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned to petal length/width
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical colors, legend, transparency all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all three species
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-categorical · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct clusters with different distributions; could
+          show more overlap between categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris flower measurements - classic, neutral scientific dataset
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values realistic for iris petals; Setosa width slightly low but plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart and custom Style; could leverage more pygal-specific
+          features like tooltips
+  verdict: APPROVED
diff --git a/plots/scatter-categorical/metadata/seaborn.yaml b/plots/scatter-categorical/metadata/seaborn.yaml
index 078c622efb..7204f1a46c 100644
--- a/plots/scatter-categorical/metadata/seaborn.yaml
+++ b/plots/scatter-categorical/metadata/seaborn.yaml
@@ -27,3 +27,174 @@ review:
   - Does not utilize seaborn's style parameter to vary marker shapes by category as
     suggested in spec
   - Could use seaborn's built-in 'colorblind' palette instead of custom colors
+  image_description: 'The plot shows a categorical scatter plot with Iris-like flower
+    measurement data. Three species (Setosa, Versicolor, Virginica) are displayed
+    using distinct colors: blue (#306998) for Setosa, yellow (#FFD43B) for Versicolor,
+    and green (#6A9F58) for Virginica. The X-axis shows "Petal Length (cm)" ranging
+    from 1-8, and the Y-axis shows "Petal Width (cm)" ranging from 0-3. The title
+    "scatter-categorical · seaborn · pyplots.ai" is displayed at the top in bold.
+    A legend in the upper-left corner identifies the three species. The Setosa cluster
+    appears tightly grouped in the bottom-left (small petals), Versicolor spans the
+    middle region, and Virginica occupies the upper-right (larger petals). A subtle
+    dashed grid is present. Points have white edge borders and slight transparency.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis labels 20pt, tick labels 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size s=200 with alpha=0.7 is appropriate for 150 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Petal Length (cm)", "Petal Width
+          (cm)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3), but dashed linestyle is slightly distracting
+          compared to solid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with categorical coloring
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y continuous variables correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors per category, legend present, alpha transparency
+          for overlap
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to species names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-categorical · seaborn · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct clusters with different spreads, but spec suggests
+          varying marker shapes could add distinction
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic Iris flower dataset scenario - excellent scientific context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Petal measurements in realistic cm ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses seaborn's scatterplot with hue parameter correctly, but doesn't
+          leverage advanced features like style parameter for marker shapes or seaborn's
+          built-in colorblind palette
+  verdict: APPROVED
diff --git a/plots/scatter-color-mapped/metadata/altair.yaml b/plots/scatter-color-mapped/metadata/altair.yaml
index 5891e2d4b2..a781f013f1 100644
--- a/plots/scatter-color-mapped/metadata/altair.yaml
+++ b/plots/scatter-color-mapped/metadata/altair.yaml
@@ -24,3 +24,175 @@ review:
     scale
   - Grid styling could benefit from slightly lower opacity (0.2 instead of 0.3) for
     subtler appearance
+  image_description: The plot displays a color-mapped scatter plot with 150 data points.
+    The X-axis shows "X Position (units)" (range 0-80) and the Y-axis shows "Y Position
+    (units)" (range 0-110). Each point is colored using the viridis colormap representing
+    an "Intensity" value (0-100), shown in the colorbar on the right. The color gradient
+    ranges from dark purple (low intensity ~0) to bright yellow (high intensity ~100).
+    The data exhibits a clear positive correlation where points in the upper-right
+    region have higher intensity values. Points have moderate size with subtle dark
+    stroke borders and good transparency (opacity ~0.75). The title "scatter-color-mapped
+    · altair · pyplots.ai" appears at the top center. The layout has balanced margins
+    with a well-positioned legend/colorbar.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 28pt, axis labels 22pt, tick labels 18pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized (180) with good opacity (0.75) for 150 points;
+          slight deduction as could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses viridis which is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, minor extra whitespace
+          at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "X Position (units)", "Y Position
+          (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3) with dashed lines, colorbar well-placed;
+          however legend title says "Intensity" without units
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct chart type: scatter plot with color mapping'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, and color correctly assigned to continuous variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: colormap, colorbar, appropriate point
+          size, transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents intensity values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-color-mapped · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows correlation between position and intensity well; data clustered
+          in three groups showing variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulates temperature/intensity readings at spatial locations - plausible
+          scenario but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Intensity 0-100 is sensible, coordinates in realistic range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's declarative encoding with Color scale, tooltips
+          for interactivity, and HTML export; could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/scatter-color-mapped/metadata/bokeh.yaml b/plots/scatter-color-mapped/metadata/bokeh.yaml
index 76e7f6bc25..a9e1763dcc 100644
--- a/plots/scatter-color-mapped/metadata/bokeh.yaml
+++ b/plots/scatter-color-mapped/metadata/bokeh.yaml
@@ -24,3 +24,176 @@ review:
   - Axis labels are generic (X Value, Y Value) rather than descriptive with units
   - Could add HoverTool for interactive tooltips in HTML output
   - Points could be slightly larger (size=35-40) given the data density
+  image_description: The plot displays a color-mapped scatter plot with approximately
+    150 data points on a light gray background (#fafafa). The X-axis ("X Value") ranges
+    from approximately 25 to 75, and the Y-axis ("Y Value") ranges from approximately
+    25 to 85. Points are colored using the Viridis colormap (purple/blue for low values
+    through green to yellow for high values), representing an "Intensity" variable
+    shown in the colorbar on the right side. The colorbar ranges from 0 to approximately
+    35. The title "scatter-color-mapped · bokeh · pyplots.ai" appears in the top-left.
+    Points show a positive correlation between X and Y, with intensity values increasing
+    for points farther from the center (around 50,50). The grid uses subtle dashed
+    lines with low alpha. Point sizes are uniform and appropriately sized for the
+    data density.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are readable but could be slightly
+          larger for the 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=30) with good alpha (0.8), though slightly
+          on the smaller side for 150 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, colorbar well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Value", "Y Value") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha 0.3 and dashed style, colorbar serves as
+          legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with color mapping
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, and color (intensity) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: scatter points, colormap, colorbar with
+          label'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately labeled "Intensity"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-color-mapped · bokeh · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows color variation across the range, demonstrates correlation
+          pattern, though could show more extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Plausible scenario (distance-based intensity), but generic "X Value"/"Y
+          Value" labels reduce realism
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible and within reasonable ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, LinearColorMapper, linear_cmap transform,
+          and ColorBar which are Bokeh-specific, but doesn't leverage Bokeh's interactive
+          features like hover tooltips
+  verdict: APPROVED
diff --git a/plots/scatter-color-mapped/metadata/highcharts.yaml b/plots/scatter-color-mapped/metadata/highcharts.yaml
index bc32984d13..531d4a55e1 100644
--- a/plots/scatter-color-mapped/metadata/highcharts.yaml
+++ b/plots/scatter-color-mapped/metadata/highcharts.yaml
@@ -22,3 +22,172 @@ review:
   - Code contains a helper function (interpolate_color) instead of pure sequential
     KISS structure
   - Axis labels could include units (e.g., X Position (km) for the geographic scenario)
+  image_description: The plot displays a color-mapped scatter plot with approximately
+    150 data points distributed across the canvas. Points are colored using the viridis
+    colormap, transitioning from dark purple (~0°C) in the bottom-left to bright yellow
+    (~68°C) in the upper-right region. The title "scatter-color-mapped · highcharts
+    · pyplots.ai" appears at the top. The X-axis is labeled "X Position" (range 0-98)
+    and Y-axis "Y Position" (range 0-84). A colorbar on the right shows "Temperature
+    (°C)" with tick marks at 0, 14, 27, 41, 55, and 68. The scatter points have subtle
+    dark outlines and are appropriately sized for the data density. Grid lines are
+    visible but subtle.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and colorbar text all clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized for 150 points with good alpha, slight overlap
+          in dense regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout, colorbar slightly compressed vertically
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Position", "Y Position") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate, legend disabled (appropriate for
+          colorbar usage)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, and color correctly mapped to three variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: scatter, color encoding, colorbar with
+          labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled with "Temperature (°C)"
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but missing decorative separator style
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full range of color values with clear gradient pattern
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Temperature across geographic region is plausible, though generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range 0-68°C is reasonable for the scenario
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Contains helper function (interpolate_color) which deviates from
+          pure KISS
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses Highcharts renderer API for custom colorbar, tooltip formatting,
+          and chart events
+  verdict: APPROVED
diff --git a/plots/scatter-color-mapped/metadata/letsplot.yaml b/plots/scatter-color-mapped/metadata/letsplot.yaml
index fdbd74c947..b8a05d39a2 100644
--- a/plots/scatter-color-mapped/metadata/letsplot.yaml
+++ b/plots/scatter-color-mapped/metadata/letsplot.yaml
@@ -25,3 +25,177 @@ review:
     improve visibility
   - Grid styling uses dashed lines which appear somewhat busy; solid subtle lines
     might be cleaner
+  image_description: 'The plot displays a color-mapped scatter plot with approximately
+    150 data points spread across a 2D coordinate system. The X-axis ranges from 5
+    to 90 meters, and the Y-axis from 10 to 110 meters. Points are colored using the
+    viridis colormap (purple to yellow), representing temperature values from approximately
+    17°C to 42°C. The colorbar on the right clearly shows "Temperature (°C)" with
+    a gradient scale. The title reads "scatter-color-mapped · lets-plot · pyplots.ai".
+    The plot uses a minimal theme with light gray dashed grid lines. Points show three
+    visible clusters: one in the lower-left (cooler purple/blue tones ~15-25°C), one
+    in the center-upper region (mid-range teal/green ~30-35°C), and scattered points
+    on the right (warmer yellow/green ~35-42°C). The temperature clearly correlates
+    with spatial position.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend text are all clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized with appropriate alpha (0.8), though slightly
+          small for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "X Coordinate (m)",
+          "Y Coordinate (m)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid dashes visible but legend label uses wrong case for "lets-plot"
+          in title (minor)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with color mapping
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y coordinates correctly mapped, color encodes third variable (temperature)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present with label, perceptually uniform colormap (viridis),
+          moderate point size, transparency for overlap
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Temperature (°C)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows color mapping across full range, clustering visible, correlation
+          between position and temperature evident
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature readings across spatial coordinates is a real-world scenario
+          (environmental monitoring, spatial analysis)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range 17-42°C is realistic for ambient/surface measurements;
+          coordinates in meters are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses interactive tooltips (layer_tooltips), theme customization,
+          scale_color_viridis, but could leverage more lets-plot features
+  verdict: APPROVED
diff --git a/plots/scatter-color-mapped/metadata/matplotlib.yaml b/plots/scatter-color-mapped/metadata/matplotlib.yaml
index 73ea4b39cd..0b64de6f89 100644
--- a/plots/scatter-color-mapped/metadata/matplotlib.yaml
+++ b/plots/scatter-color-mapped/metadata/matplotlib.yaml
@@ -25,3 +25,178 @@ review:
     control
   - Grid could be even more subtle (alpha=0.2) to reduce visual noise
   - Colorbar tick label size (16pt) is slightly smaller relative to other labels (20pt)
+  image_description: The plot displays a scatter plot with 150 data points distributed
+    across a coordinate system representing longitude (-10° to 10°) on the x-axis
+    and latitude (-5° to 5°) on the y-axis. Each point is colored according to a temperature
+    value using the viridis colormap, ranging from approximately 2°C (dark purple)
+    to 30°C (bright yellow). The color gradient clearly shows warmer temperatures
+    (yellow-green) concentrated toward the top-right region and cooler temperatures
+    (purple-blue) toward the bottom-left, demonstrating the intended correlation.
+    Points have white edge outlines for better visibility. A vertical colorbar on
+    the right shows "Temperature (°C)" with appropriate tick marks. The title correctly
+    follows the format "scatter-color-mapped · matplotlib · pyplots.ai". Grid lines
+    are subtle with alpha transparency.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, colorbar
+          label at 20pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized at s=150 with alpha=0.8, appropriate for 150 points
+          (guidelines suggest s=100-200 for 100-300 points, so this is good but could
+          be slightly larger)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses viridis colormap, which is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot is well-centered with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Longitude (°)" and "Latitude (°)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha=0.3, but colorbar label font could be slightly
+          larger for better balance
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (longitude), Y (latitude), color (temperature) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colormap present, colorbar with clear label and units, moderate point
+          size with transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents temperature scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "scatter-color-mapped · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows color gradient across full range, but correlation pattern could
+          be more pronounced in some areas
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Geographic temperature measurements is a real-world scenario that
+          makes sense
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range of ~2-30°C is realistic, longitude/latitude ranges
+          are sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ax.scatter with colormap correctly, but could leverage more
+          matplotlib-specific features like norm for color scaling or custom colorbar
+          formatting
+  verdict: APPROVED
diff --git a/plots/scatter-color-mapped/metadata/plotly.yaml b/plots/scatter-color-mapped/metadata/plotly.yaml
index fa8a2e995c..4a70788705 100644
--- a/plots/scatter-color-mapped/metadata/plotly.yaml
+++ b/plots/scatter-color-mapped/metadata/plotly.yaml
@@ -25,3 +25,175 @@ review:
     colorbar ticks or range sliders
   - Grid alpha at 0.3 is acceptable but could be slightly more subtle (0.2) for a
     cleaner look
+  image_description: The plot displays a scatter plot with 150 data points showing
+    temperature readings across US geographic coordinates. The x-axis shows Longitude
+    (°W) ranging from -120 to -70, and the y-axis shows Latitude (°N) ranging from
+    approximately 25 to 50. Each point is colored using the Viridis colormap, mapping
+    temperature values from approximately 15°F (dark purple) to 50°F (yellow). The
+    colorbar on the right clearly shows "Temperature (°F)" with a well-defined gradient.
+    The title "scatter-color-mapped · plotly · pyplots.ai" is centered at the top.
+    Points have white borders for visibility, and the expected latitude-temperature
+    relationship is visible (cooler/purple in the north, warmer/yellow-green in the
+    south).
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 18pt, colorbar
+          title at 20pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at size 18 with 0.8 opacity work well for 150 points, white
+          borders enhance visibility; slightly larger could help
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, balanced margins (l=100, r=120, t=100, b=100)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Longitude (°W)", "Latitude (°N)", "Temperature
+          (°F)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at 0.3 alpha is subtle, colorbar well placed; grid could be
+          slightly more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with color mapping
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=longitude, Y=latitude, Color=temperature correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar present, transparency for overlap, moderate point sizes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 150 points visible, no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled with "Temperature (°F)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-color-mapped · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows color gradient well across full range; could show more extreme
+          temperature variations
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature distribution across US coordinates is a real, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range (15-50°F) is realistic for a continental region;
+          some southern points seem cool
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html for interactivity)
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses hovertemplate for rich tooltips, but could leverage more Plotly
+          features like animations or custom hover modes
+  verdict: APPROVED
diff --git a/plots/scatter-color-mapped/metadata/plotnine.yaml b/plots/scatter-color-mapped/metadata/plotnine.yaml
index cc836a0782..018ab439f2 100644
--- a/plots/scatter-color-mapped/metadata/plotnine.yaml
+++ b/plots/scatter-color-mapped/metadata/plotnine.yaml
@@ -27,3 +27,175 @@ review:
     data density
   - Temperature range could show more variation to better demonstrate the full colormap
     spectrum
+  image_description: 'The plot displays a color-mapped scatter plot with 150 data
+    points distributed across a 100m × 100m sensor grid. Points are colored using
+    the viridis colormap to represent temperature values ranging from approximately
+    20°C to 45°C. A clear spatial temperature gradient is visible: cooler temperatures
+    (purple/dark blue, ~20-25°C) appear in the lower-left region while warmer temperatures
+    (yellow/green, ~40-45°C) appear in the upper-right region. The title "scatter-color-mapped
+    · plotnine · pyplots.ai" is displayed at the top. Axis labels show "X Position
+    (m)" and "Y Position (m)" with units. A vertical colorbar legend on the right
+    side is labeled "Temperature (°C)" with tick marks at 25, 30, 35, 40, and 45.
+    The plot uses a clean minimal theme with subtle gridlines and good use of transparency
+    (alpha=0.8) for the markers.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt, legend text
+          at 14pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at size=4 with alpha=0.8 are well-sized for 150 points, though
+          could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but some empty space on right side due to colorbar
+          placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "X Position (m)", "Y Position (m)",
+          "Temperature (°C)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid from theme_minimal(), colorbar well-positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with color mapping
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y positions correctly mapped, temperature as color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: colormap, colorbar, transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full 0-100 range for both dimensions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled with variable name and units
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exact format: "scatter-color-mapped · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows spatial temperature gradient well, demonstrates color mapping
+          effectively; could show more extreme variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature sensor grid is a realistic scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range 20-45°C is plausible but slightly warm for typical
+          conditions
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (aes, geom_point, scale_color_cmap, labs, theme_minimal,
+          theme) correctly but no advanced features like faceting
+  verdict: APPROVED
diff --git a/plots/scatter-color-mapped/metadata/pygal.yaml b/plots/scatter-color-mapped/metadata/pygal.yaml
index 533b373e01..29cd20b0d3 100644
--- a/plots/scatter-color-mapped/metadata/pygal.yaml
+++ b/plots/scatter-color-mapped/metadata/pygal.yaml
@@ -24,3 +24,176 @@ review:
     inconsistency'
   - Layout could utilize more canvas space; legend at bottom takes significant real
     estate
+  image_description: 'The plot displays a scatter visualization titled "scatter-color-mapped
+    · pygal · pyplots.ai" showing 100 data points representing temperature readings
+    across a 100×100 meter sensor grid. The X-axis is labeled "Grid X Position (meters)"
+    and Y-axis "Grid Y Position (meters)". Points are colored using a viridis-inspired
+    palette spanning 8 temperature bins from dark purple (13-16°C) to yellow (32-35°C).
+    The visualization clearly demonstrates a radial temperature pattern with warmer
+    temperatures (yellow/green points: 26-35°C) concentrated toward the center of
+    the grid and cooler temperatures (purple/blue points: 13-24°C) at the periphery.
+    The legend at the bottom displays all 8 temperature ranges. Grid lines are subtle
+    with dashed styling. The plot fills the canvas well with balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and legend all readable at full size; font sizes
+          well-calibrated for 4800×2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend properly placed at bottom
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Dots size 16 appropriate for 100 points; alpha 0.85 provides good
+          visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis-inspired palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall but legend takes significant space; plot area could
+          be slightly larger
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Grid X Position (meters)", "Grid
+          Y Position (meters)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle with dashed lines; legend clear and well-organized
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot type using pygal.XY()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped to grid positions
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Color mapping implemented via discrete bins; legend serves as colorbar
+          substitute, but continuous colorbar would be ideal (pygal limitation)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data within 0-100 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend labels correct but first bin has extra "Temperature:" prefix
+          breaking consistency
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-color-mapped · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows temperature gradient pattern effectively with radial distribution;
+          good color range coverage
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature sensor grid is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature range 13-35°C realistic for outdoor measurements; grid
+          size reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern; no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style imported; all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses XY chart, custom Style, renders to both PNG and SVG/HTML; creative
+          use of discrete series for color mapping
+  verdict: APPROVED
diff --git a/plots/scatter-color-mapped/metadata/seaborn.yaml b/plots/scatter-color-mapped/metadata/seaborn.yaml
index 004e53233a..c33aec1383 100644
--- a/plots/scatter-color-mapped/metadata/seaborn.yaml
+++ b/plots/scatter-color-mapped/metadata/seaborn.yaml
@@ -26,3 +26,177 @@ review:
     confuse viewers
   - Could leverage seaborn native hue legend capabilities instead of manually creating
     ScalarMappable
+  image_description: The plot displays a color-mapped scatter plot showing 150 data
+    points representing temperature readings across a geographic grid. The X-axis
+    shows "Longitude (°E)" ranging from -10 to 10, and the Y-axis shows "Latitude
+    (°N)" ranging from -8 to 8. Each point is colored using the viridis colormap,
+    with a colorbar on the right indicating "Temperature (°C)" ranging from approximately
+    12°C (dark purple) to 37°C (bright yellow). Points near the center of the plot
+    are warmer (yellow/green) while those at the edges are cooler (blue/purple), clearly
+    demonstrating the radial temperature pattern. The title reads "scatter-color-mapped
+    · seaborn · pyplots.ai". A subtle dashed grid is visible in the background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized with good alpha (0.8), though size variation
+          adds slight visual complexity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout, plot fills canvas well, slight asymmetry due to colorbar
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Longitude (°E)", "Latitude (°N)",
+          "Temperature (°C)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid has alpha=0.3 which is good, but no legend for size encoding
+          (size also maps to temperature)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with color mapping
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned as geographic coordinates, color mapped to
+          temperature
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has colorbar with clear label, moderate point size, transparency
+          for overlap
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 150 points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately labeled with variable and units
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-color-mapped · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows full range of color values, demonstrates pattern (center=hot,
+          edge=cold), good variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature distribution across geographic coordinates is a real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperatures 12-37°C are realistic, though longitude/latitude range
+          is small for typical geographic data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses sns.scatterplot which is standard, but manually creates colorbar
+          rather than leveraging seaborn's built-in hue handling with legend. The
+          size parameter adds complexity but seaborn could handle color mapping more
+          elegantly.
+  verdict: APPROVED
diff --git a/plots/scatter-marginal/metadata/altair.yaml b/plots/scatter-marginal/metadata/altair.yaml
index 0c67b48849..82cc765020 100644
--- a/plots/scatter-marginal/metadata/altair.yaml
+++ b/plots/scatter-marginal/metadata/altair.yaml
@@ -28,3 +28,177 @@ review:
     opportunity for linked interactivity)
   - Right marginal histogram shows Y-axis tick labels which creates slight visual
     redundancy
+  image_description: 'The plot displays a scatter chart with marginal distribution
+    histograms in the classic layout: main scatter plot in the lower-left, top histogram
+    for X distribution, right histogram for Y distribution. The scatter shows ~150
+    data points in Python blue (#306998) with 0.65 opacity, demonstrating a clear
+    positive linear correlation. X-axis labeled "X Value (units)" ranges roughly 0-90,
+    Y-axis labeled "Y Value (units)" ranges roughly 0-110. Both marginal histograms
+    use matching blue bars with 0.5 opacity, showing approximately normal distributions.
+    The title "scatter-marginal · altair · pyplots.ai" appears centered at the top
+    in appropriately large font. The axes of marginal plots align well with the main
+    scatter plot axes. Layout is clean with no overlapping elements.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear (~28pt), axis labels readable (~20pt), tick
+          labels properly sized (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized (size=120) with good opacity (0.65) for 150 points;
+          slightly on the larger side but acceptable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with marginal plots aligned; slight asymmetry in spacing
+          between components
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "X Value (units)", "Y Value (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid on scatter plot, no legend (though legend not strictly needed
+          here)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with marginal histograms
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned, marginal histograms show correct distributions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has main scatter, top marginal (histogram), right marginal (histogram),
+          proper alignment
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (single series)
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "scatter-marginal · altair · pyplots.ai" but
+          could use larger font
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive correlation well; marginal distributions visible;
+          could show more varied distribution shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic X/Y values with units; plausible but not a specific real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are reasonable for generic numeric data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair 5/6 API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses vconcat/hconcat composition, interactive brush selection, tooltips,
+          and declarative encoding - good use of Altair's compositional grammar, but
+          brush selection doesn't update marginal histograms (would require transform_filter)
+  verdict: APPROVED
diff --git a/plots/scatter-marginal/metadata/bokeh.yaml b/plots/scatter-marginal/metadata/bokeh.yaml
index 7208319554..7583ef23c6 100644
--- a/plots/scatter-marginal/metadata/bokeh.yaml
+++ b/plots/scatter-marginal/metadata/bokeh.yaml
@@ -24,3 +24,177 @@ review:
   - Scatter point size (18) could be slightly larger for better visibility on this
     canvas
   - Empty corner placeholder could be utilized for annotation or removed more elegantly
+  image_description: The plot displays a scatter plot with marginal distributions
+    using Bokeh. The main scatter plot (lower-left) shows approximately 200 blue data
+    points (#306998 Python blue) with moderate transparency (alpha ~0.65) displaying
+    a positive correlation. The top marginal shows a histogram of the X distribution
+    in yellow bars (#FFD43B Python yellow) with blue outlines, roughly bell-shaped
+    and centered around 50. The right marginal shows a horizontal histogram of the
+    Y distribution, also in yellow with blue outlines. The title "scatter-marginal
+    · bokeh · pyplots.ai" appears at the top-left of the main scatter plot. Axes are
+    labeled "X Value" and "Y Value" with "Count" labels on the marginals. Grid lines
+    are subtle with dashed styling. There's an empty corner placeholder in the top-right.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, font sizes are appropriate for
+          the canvas size (28pt title, 22pt labels, 18pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter points (size=18) are well-sized for 200 points with good
+          alpha=0.65, slightly on the smaller side for this canvas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Python blue and yellow are colorblind-safe and provide good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with marginals properly aligned, though there's some
+          unused space in the corner area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Value", "Y Value") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3) with dashed styling, no legend needed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct composite visualization: scatter with marginal histograms'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned and aligned between scatter and marginals
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has scatter + marginal histograms; spec mentions KDE as option but
+          histograms are acceptable
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes properly auto-scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend present (not strictly needed but marginals could benefit
+          from labels)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-marginal · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows correlation clearly, distributions visible; could show more
+          varied distribution shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Bivariate normal with correlation is a realistic, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values are sensible (X: ~10-90, Y: ~10-95), 200 points is good for
+          this visualization'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, figure range linking for axis alignment, quad
+          glyphs for histograms. Could leverage more Bokeh-specific features like
+          hover tools or linked selections.
+  verdict: APPROVED
diff --git a/plots/scatter-marginal/metadata/highcharts.yaml b/plots/scatter-marginal/metadata/highcharts.yaml
index a767481b05..4cb8ccbd21 100644
--- a/plots/scatter-marginal/metadata/highcharts.yaml
+++ b/plots/scatter-marginal/metadata/highcharts.yaml
@@ -22,3 +22,179 @@ review:
   - Top-right corner is empty white space that could be utilized better
   - Could use Highcharts chart linking features for synchronized crosshairs/tooltips
   - Library version in header shows unknown instead of actual version
+  image_description: The plot displays a scatter plot with marginal histogram distributions.
+    The main scatter plot occupies the lower-left area showing temperature (°C) on
+    the x-axis (ranging ~10-38°C) vs. relative humidity (%) on the y-axis (ranging
+    ~20-78%). A clear negative correlation is visible between temperature and humidity.
+    The top marginal shows a histogram of temperature distribution with vertical blue
+    bars, peaking around 23-27°C. The right marginal shows a horizontal histogram
+    of humidity distribution, with most values concentrated in the 42-58% range. The
+    title "scatter-marginal · highcharts · pyplots.ai" appears at the top. All elements
+    use a consistent blue color (#306998) with transparency. Axis labels include units.
+    Grid lines are subtle dashed lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title, axis labels, and tick marks are appropriately
+          sized for the resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; histograms, scatter points, and labels
+          are all clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Scatter markers are well-sized (radius 12) with appropriate transparency
+          (0.45 fill) for 150 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is colorblind-safe; no red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout with aligned axes, but there's some wasted space in the
+          top-right corner; plot areas well proportioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Labels include units: "Temperature (°C)" and "Relative Humidity
+          (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid lines with appropriate opacity; legend disabled
+          (appropriate for single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with marginal histogram distributions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Temperature, Y=Humidity correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: scatter plot, top marginal histogram,
+          right marginal histogram, aligned axes'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 150 data points visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series; disabled appropriately
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-marginal · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows correlation pattern well; marginal distributions show spread;
+          could benefit from more distinct clustering or outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Realistic sensor data scenario with temperature/humidity showing
+          expected negative correlation
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature 10-38°C and humidity 20-78% are realistic environmental
+          values
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure; no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible data
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used, but some redundancy with multiple series imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png via screenshot (correct)
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses multiple synchronized charts for composite layout; could leverage
+          more Highcharts-specific features like synchronized crosshairs or chart
+          linking
+  verdict: APPROVED
diff --git a/plots/scatter-marginal/metadata/letsplot.yaml b/plots/scatter-marginal/metadata/letsplot.yaml
index d8dca45bcf..bddcf80605 100644
--- a/plots/scatter-marginal/metadata/letsplot.yaml
+++ b/plots/scatter-marginal/metadata/letsplot.yaml
@@ -27,3 +27,183 @@ review:
   - Axis labels are generic (X Value, Y Value) without real-world context or units
   - Right marginal histogram extends slightly past main plot area creating minor misalignment
     at bottom
+  image_description: The plot displays a scatter plot with marginal distributions
+    arranged in a composite layout. The main scatter plot occupies the lower-left
+    area showing ~200 blue points (#306998) with positive correlation between X Value
+    (range 4-16) and Y Value (range 3-17). The top marginal shows a horizontal histogram
+    with blue/gray bars and a red KDE curve overlaid, displaying the X distribution.
+    The right marginal shows a vertical (rotated) histogram with blue/gray bars and
+    red KDE curve for the Y distribution. The title "scatter-marginal · letsplot ·
+    pyplots.ai" appears between the top histogram and main plot. Axis labels are "X
+    Value" and "Y Value". The layout uses ggbunch to combine three separate plots.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable. Title uses
+          size=24, axis titles size=20, tick text size=16 - all appropriate for the
+          4800×2700 output.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. The layout cleanly separates all components.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=4) with good alpha (0.65) for 200 data
+          points. The scatter pattern and correlation are clearly visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe blue (#306998) for points and red (#DC2626)
+          for KDE curves - good contrast and distinction.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of ggbunch for layout. The title placement between top
+          histogram and main plot is unusual but functional. Minor: margins could
+          be slightly tighter.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Value" and "Y Value" but without units (acceptable
+          for generic example data).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Minimal theme with subtle grid. No legend needed for single-series
+          plot. Grid visible but unobtrusive.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct composite plot: scatter with marginal distributions (histograms
+          + KDE).'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly mapped to axes, marginals correctly aligned.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Has all required features: scatter plot, marginal histograms, KDE
+          overlays, proper axis alignment via shared limits.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes properly scaled with 0.5 padding.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A - no legend needed, single series.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "scatter-marginal · letsplot · pyplots.ai" but
+          placed in unusual position (title of main plot appears between histograms).
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive correlation clearly. Both marginal distributions show
+          roughly normal shape. Could show more interesting distribution features
+          (bimodality, outliers).
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic but plausible bivariate data with correlation coefficient
+          ~0.8. Labels are generic "X Value"/"Y Value".
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sensible numeric ranges, appropriate data count (200 points) for
+          this visualization type.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plots → combine → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, lets_plot, export).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html for interactive version).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggbunch for combining plots (lets-plot specific), coord_flip
+          for right histogram, proper theme customization. Good use of ggplot2-style
+          grammar. Could use more advanced features like flavor themes or tooltips.
+  verdict: APPROVED
diff --git a/plots/scatter-marginal/metadata/matplotlib.yaml b/plots/scatter-marginal/metadata/matplotlib.yaml
index 3a31fea13a..d06d572347 100644
--- a/plots/scatter-marginal/metadata/matplotlib.yaml
+++ b/plots/scatter-marginal/metadata/matplotlib.yaml
@@ -26,3 +26,176 @@ review:
   - Could overlay KDE curves on histograms as spec suggests this is an option
   - Marker size (s=120) slightly larger than optimal for 200 points per guidelines
     (should be ~50-100)
+  image_description: The plot displays a scatter plot with marginal distributions
+    using matplotlib GridSpec layout. The central scatter plot shows 200 blue data
+    points (#306998) with white edges and alpha=0.65, revealing a clear positive linear
+    correlation (r≈0.7). Two clusters are visible in the data corresponding to the
+    bimodal X distribution. The top marginal histogram shows the X distribution with
+    two distinct peaks (around 30 and 60), while the right marginal histogram displays
+    the Y distribution (roughly unimodal, centered ~45). All elements use consistent
+    blue coloring. The title "scatter-marginal · matplotlib · pyplots.ai" appears
+    in the top-right corner. Axis labels show "X Value", "Y Value" for the main plot
+    and "Count" for marginals. A subtle dashed grid (alpha=0.3) aids readability.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 14-16pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at s=120 with alpha=0.65 are well-sized for 200 points, though
+          slightly large for optimal density visualization
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast, fully colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: GridSpec creates well-proportioned layout with main plot and aligned
+          marginals
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels ("X Value", "Y Value") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3) but no legend needed for single-series
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with marginal distributions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Main scatter + top histogram + right histogram, all aligned as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single-series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-marginal · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows correlation and bimodal X distribution, but the two-cluster
+          structure could be more pronounced in the visualization
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Plausible correlated bivariate data, but generic "X Value"/"Y Value"
+          labels reduce realism
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in sensible ranges (10-90 for X, 0-80 for Y)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only used imports (matplotlib.pyplot, numpy, GridSpec)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" (correct) but file path is relative, not showing
+          full plots/{spec-id}/ structure
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of GridSpec for complex layout, sharex/sharey for axis alignment,
+          but could leverage more advanced matplotlib features like KDE overlay or
+          Axes insets
+  verdict: APPROVED
diff --git a/plots/scatter-marginal/metadata/plotly.yaml b/plots/scatter-marginal/metadata/plotly.yaml
index 530376e6c6..15c68ea3d3 100644
--- a/plots/scatter-marginal/metadata/plotly.yaml
+++ b/plots/scatter-marginal/metadata/plotly.yaml
@@ -24,3 +24,171 @@ review:
   weaknesses:
   - Axis labels are generic (X Value, Y Value) without units or more descriptive context
   - Could use a more realistic data scenario instead of abstract random data
+  image_description: 'The plot shows a scatter plot with marginal histogram distributions.
+    The main scatter plot occupies the lower-left portion with blue circular markers
+    (color #306998) showing a positive correlation between X Value (ranging ~20-80)
+    and Y Value (ranging ~20-85). The top marginal histogram shows the X distribution
+    with approximately normal shape centered around 50. The right marginal histogram
+    shows the Y distribution, also roughly normal. The title "scatter-marginal · plotly
+    · pyplots.ai" is centered at the top in dark gray text. All text is clearly readable,
+    axis labels show "X Value" and "Y Value", tick labels are visible on all axes,
+    and the overall layout is clean with a white background and subtle gray grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at full
+          size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized with good opacity (0.65), though slightly
+          large for 200 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-proportioned layout with marginals aligned to main plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but no units provided
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha 0.3), no legend needed for single series
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter with marginal distributions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned with correlation
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Marginal histograms on top and right, aligned with main scatter
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: scatter-marginal · plotly · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows correlation and distributions, could show more varied distribution
+          shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic bivariate data is plausible but not a specific real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for generic numeric data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.express imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly.express API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Leverages plotly.express's built-in marginal_x/marginal_y feature,
+          interactive HTML output
+  verdict: APPROVED
diff --git a/plots/scatter-marginal/metadata/plotnine.yaml b/plots/scatter-marginal/metadata/plotnine.yaml
index 4de9c84399..8cc5aa89a3 100644
--- a/plots/scatter-marginal/metadata/plotnine.yaml
+++ b/plots/scatter-marginal/metadata/plotnine.yaml
@@ -22,3 +22,177 @@ review:
   weaknesses:
   - Grid styling inconsistent between main scatter plot and marginal histograms
   - Top-right spacer creates slight visual imbalance in the layout
+  image_description: The plot displays a scatter plot with marginal distributions
+    in a 2x2 grid layout. The main scatter plot (bottom-left) shows blue data points
+    representing the correlation between Study Hours per Week (x-axis, 0-50) and Exam
+    Score (%, y-axis, 30-100). Above it, a blue histogram shows the distribution of
+    study hours, centered around 25 hours with a roughly normal distribution. On the
+    right side, a yellow/gold histogram shows the exam score distribution in a flipped
+    orientation (horizontal bars), displaying a roughly normal distribution. The title
+    "scatter-marginal · plotnine · pyplots.ai" appears at the top. The top-right corner
+    is an empty spacer. The data shows a clear positive correlation between study
+    hours and exam scores.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized with appropriate alpha=0.6, good visibility for
+          200 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and provide
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout but top-right spacer creates some visual imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Study Hours per Week", "Exam Score
+          (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines visible on scatter plot but marginal histograms have no
+          grid; slight inconsistency
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with marginal distributions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped to study hours and exam score
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: scatter plot, top histogram, right histogram,
+          proper alignment'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A (full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-marginal · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows correlation pattern, different distributions for each variable,
+          outliers visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Study hours vs exam score is a real, comprehensible educational scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Study hours 5-45, exam scores 30-100 are realistic ranges
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses proper structure but has some complexity with theme definitions
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: All imports used, but element_text imported with other unused ones
+          initially listed
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Good use of plotnine's composition API (plot_spacer, | and / operators),
+          grammar of graphics approach. Could have used faceting or additional ggplot2-like
+          features.
+  verdict: APPROVED
diff --git a/plots/scatter-marginal/metadata/seaborn.yaml b/plots/scatter-marginal/metadata/seaborn.yaml
index ca69b0bb0c..eab934cfdd 100644
--- a/plots/scatter-marginal/metadata/seaborn.yaml
+++ b/plots/scatter-marginal/metadata/seaborn.yaml
@@ -26,3 +26,177 @@ review:
     units
   - Data scenario is generic rather than representing a real-world use case
   - Grid uses dashed style which is slightly more prominent than subtle solid gridlines
+  image_description: The plot displays a scatter plot with marginal distributions
+    using seaborn's jointplot. The central scatter plot shows 200 blue points (#306998)
+    with moderate transparency (alpha 0.65) and white edges. There is a clear positive
+    correlation visible in the data. The top marginal shows a histogram with ~25 bins
+    and an overlaid KDE curve for the X variable distribution. The right marginal
+    shows a horizontal histogram with KDE for the Y variable distribution. Both marginal
+    histograms use the same blue color with alpha 0.7. The title "scatter-marginal
+    · seaborn · pyplots.ai" appears at the top. Axis labels show "X Value" and "Y
+    Value" with a subtle dashed grid. The overall layout is square (~1:1 aspect ratio)
+    which is appropriate for jointplots.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 26pt, axis labels at 22pt, tick labels at 14-16pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized appropriately (s=150) for 200 points with good alpha
+          (0.65). White edge helps distinguish overlapping points. Slightly larger
+          than optimal per guidelines (100-200 range for 100-300 points).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of square aspect ratio for jointplot, marginals well-proportioned.
+          Minor: slight imbalance between marginal sizes due to ratio=5.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "X Value" and "Y Value" but no units provided
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but dashed style slightly more prominent
+          than solid alpha grid. No legend needed for single-color scatter.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with marginal distributions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has central scatter, top marginal histogram, right marginal histogram,
+          both with KDE overlays as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes properly scaled
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A, no legend needed for this plot type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-marginal · seaborn · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive correlation and univariate distributions well. Could
+          show more interesting distribution features (e.g., slight skewness or bimodality).
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic "X Value" / "Y Value" labels - plausible but not a specific
+          real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values in reasonable numeric ranges (X: 10-90, Y: 10-100)'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → styling → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and seaborn imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's jointplot which is a distinctive feature. However,
+          could have used additional seaborn features like hue parameter or different
+          marginal_kind options (e.g., "kde" or "hist+kde" using marginal_ticks).
+  verdict: APPROVED
diff --git a/plots/scatter-matrix/metadata/bokeh.yaml b/plots/scatter-matrix/metadata/bokeh.yaml
index 11ae196874..b4514df632 100644
--- a/plots/scatter-matrix/metadata/bokeh.yaml
+++ b/plots/scatter-matrix/metadata/bokeh.yaml
@@ -25,3 +25,174 @@ review:
     outside the grid
   - Output includes both PNG and HTML; while HTML is appropriate for Bokeh, it is
     extra output
+  image_description: 'The scatter matrix displays a 4×4 grid of plots comparing four
+    Iris dataset variables: Sepal Length (cm), Sepal Width (cm), Petal Length (cm),
+    and Petal Width (cm). The diagonal cells show overlapping stacked histograms for
+    three species distributions, while off-diagonal cells show scatter plots for each
+    variable pair. Colors used are Python Blue (#306998) for Setosa, Python Yellow
+    (#FFD43B) for Versicolor, and teal (#2AA198) for Virginica. The title "scatter-matrix
+    · bokeh · pyplots.ai" appears at the top-left corner. A legend in the top-left
+    histogram cell identifies the three species. Variable names with units appear
+    along the left (y-axis) and bottom (x-axis) edges. Grid lines are subtle and dashed.
+    All plots have consistent dark gray outlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text clearly readable: title at 28pt, axis labels at 20pt, tick
+          labels at 16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the grid
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: 'Scatter markers sized well (14px), histograms visible. Minor: some
+          scatter cells have slightly dense overlap'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Teal palette is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: 4x4 grid fills canvas well, but square format means some wasted space
+          around edges
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sepal Length (cm)", "Petal Width
+          (cm)", etc.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha=0.3, dashed), legend well-placed in top-left cell
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter matrix / pairplot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly assigned to rows/columns
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal histograms, off-diagonal scatters, color encoding for groups
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend shows only in top-left cell; labels correct but could be more
+          prominent
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "scatter-matrix · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows correlations between petal/sepal measurements, species clustering
+          clearly visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris dataset is classic ML/statistics example, measurements are plausible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in realistic botanical measurement ranges (cm)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → grid construction → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also outputs plot.html (correct but minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, factor_cmap, gridplot properly. Could leverage
+          more Bokeh-specific interactivity features
+  verdict: APPROVED
diff --git a/plots/scatter-matrix/metadata/highcharts.yaml b/plots/scatter-matrix/metadata/highcharts.yaml
index 6a6397c186..59f95b89e4 100644
--- a/plots/scatter-matrix/metadata/highcharts.yaml
+++ b/plots/scatter-matrix/metadata/highcharts.yaml
@@ -23,3 +23,177 @@ review:
   - 'Grid lines could be slightly more subtle (alpha 0.2-0.3 instead of #e0e0e0)'
   - Bottom-row x-axis tick labels are partially cut off/difficult to read
   - Uses raw Highcharts JS generation instead of highcharts-core Python library
+  image_description: 'The scatter matrix displays a 4×4 grid showing pairwise relationships
+    between four Iris dataset variables: Sepal Length (cm), Sepal Width (cm), Petal
+    Length (cm), and Petal Width (cm). The diagonal cells contain stacked histograms
+    showing distributions per species. The off-diagonal cells show scatter plots with
+    data points colored by species. Three species are shown: Setosa (blue), Versicolor
+    (yellow/gold), and Virginica (purple). The title "scatter-matrix · highcharts
+    · pyplots.ai" appears at the top. Variable names appear along the top row and
+    left column as axis labels. A legend at the bottom identifies the three species
+    with colored circular markers. The colorblind-safe palette uses distinct blue,
+    yellow, and purple colors.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable, font sizes scaled
+          for high-res canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels well positioned along edges
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter markers visible with alpha 0.75, appropriate sizing for 50
+          points per species
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette (blue/yellow/purple), no red-green
+          issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of 3600×3600 canvas, matrix fills most of the space with
+          title and legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sepal Length (cm)", "Petal Width (cm)",
+          etc.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid subtle (alpha via #e0e0e0), legend well placed at bottom'
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot matrix with histograms on diagonal
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Variables correctly mapped to axes in symmetric grid
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Histograms on diagonal, scatter plots off-diagonal, color by category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Species names correct but legend could be larger/more prominent
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-matrix · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear species separation in petal measurements, overlap in
+          sepal measurements, different distribution shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic Iris dataset scenario, perfect for demonstrating scatter
+          matrix
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values match real Iris ranges, though synthetic data slightly varies
+          from actual dataset
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot generation → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Highcharts API via raw JS
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts via Selenium rendering with multiple chart containers,
+          stacked column histograms, but doesn't use highcharts-core Python library
+          as recommended
+  verdict: APPROVED
diff --git a/plots/scatter-matrix/metadata/letsplot.yaml b/plots/scatter-matrix/metadata/letsplot.yaml
index a51f37b352..f381236c3e 100644
--- a/plots/scatter-matrix/metadata/letsplot.yaml
+++ b/plots/scatter-matrix/metadata/letsplot.yaml
@@ -24,3 +24,184 @@ review:
   - Wildcard import style could be cleaner with explicit imports
   - Could use geom_density() on diagonal for smoother distribution visualization as
     alternative
+  image_description: 'The plot displays a 4×4 scatter plot matrix showing pairwise
+    relationships between four Iris flower measurements: Sepal Length (Sepal Len),
+    Sepal Width (Sepal Wid), Petal Length (Petal Len), and Petal Width (Petal Wid).
+    The diagonal cells contain overlapping histograms showing the distribution of
+    each variable, while off-diagonal cells show scatter plots of variable pairs.
+    Three species are color-coded: Setosa (blue #306998), Versicolor (yellow #FFD43B),
+    and Virginica (red #DC2626). The title "scatter-matrix · letsplot · pyplots.ai"
+    appears at the top. A legend at the bottom identifies the three species. The plot
+    uses a white background with minimal grid styling. Variable names appear on both
+    the left y-axis labels and bottom x-axis labels of the matrix.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is bold and large (~32pt), axis labels are ~18pt, tick labels
+          ~14pt. All text is readable, though tick labels could be slightly larger
+          for optimal viewing at full resolution.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Labels and tick marks are well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Point size (3.5) and alpha (0.7) work well for 150 data points.
+          Histograms are clear with good transparency. Minor: some scatter panels
+          show slight overplotting but still readable.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Red palette is distinguishable. Yellow and Red could
+          be slightly problematic for some colorblind users, but generally acceptable.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Matrix fills the canvas well with appropriate margins for title and
+          legend. Good proportions.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Sepal Len", "Petal Wid") but abbreviated
+          without units. Original column names had units "(cm)" but were shortened.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Minimal grid with minor gridlines removed. Legend is well-positioned
+          at bottom with horizontal layout.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot matrix with histograms on diagonal and scatter
+          plots off-diagonal.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All four variables correctly mapped across rows and columns.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: diagonal distributions, pairwise scatter
+          plots, color encoding for species, variable labels on edges.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis ranges.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three species with matching colors.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact required format "scatter-matrix · letsplot · pyplots.ai".
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows correlations between variables, distinct species clusters,
+          and varying distributions. Good separation between species visible. Minor:
+          could show outliers more explicitly.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses classic Iris flower dataset with realistic measurements based
+          on actual biological data distributions.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Sepal/petal measurements are in realistic cm ranges. Some generated
+          values slightly extend beyond typical ranges but acceptable.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern. No functions/classes.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses wildcard import `from lets_plot import *` with noqa comments.
+          Functional but not cleanest.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggbunch for combining plots, ggplot grammar of graphics, scale_color_manual/scale_fill_manual,
+          theme customization. Good use of lets-plot features but could leverage more
+          advanced features like geom_density for diagonal instead of histogram.
+  verdict: APPROVED
diff --git a/plots/scatter-matrix/metadata/matplotlib.yaml b/plots/scatter-matrix/metadata/matplotlib.yaml
index 2933f672c9..df79b5b076 100644
--- a/plots/scatter-matrix/metadata/matplotlib.yaml
+++ b/plots/scatter-matrix/metadata/matplotlib.yaml
@@ -28,3 +28,176 @@ review:
     visibility given the data density
   - Could leverage more distinctive matplotlib features like shared axes linking or
     colorbar for additional encoding
+  image_description: 'The plot displays a 4×4 scatter plot matrix using the classic
+    Iris flower dataset structure with four measurements: Sepal Length (cm), Sepal
+    Width (cm), Petal Length (cm), and Petal Width (cm). The diagonal cells contain
+    overlapping histograms showing the distribution of each variable, with three species
+    color-coded: Setosa (blue #306998), Versicolor (yellow #FFD43B), and Virginica
+    (green #4CAF50). Off-diagonal cells show scatter plots for each variable pair,
+    clearly revealing the characteristic clustering patterns (Setosa well-separated
+    from the other two species, especially in petal measurements). The legend is positioned
+    in the top-right subplot. Axis labels with units appear on the left and bottom
+    edges. A subtle dashed grid (alpha=0.3) appears in each subplot. The title follows
+    the required format at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; layout is clean
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Markers at s=70 are appropriate for 150 points; histograms have good
+          alpha=0.7; could be slightly larger for this data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/green palette is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format (12x12) with tight spacing works well for matrix; plot
+          fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All labels include units: "Sepal Length (cm)" etc.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate (alpha=0.3), but legend placement in top-right
+          subplot partially overlaps with scatter points in that cell
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot matrix with diagonal histograms
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Pairwise relationships correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: diagonal distributions, off-diagonal
+          scatters, color encoding for groups, variable labels on edges'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Species names correctly labeled with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"scatter-matrix · matplotlib · pyplots.ai" follows required format'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows correlations, clusters (Setosa separation), different distribution
+          shapes, varying relationships
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris-like flower measurements are a classic, realistic example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values realistic for flower measurements (sepal 4-8cm, petal 0-7cm,
+          etc.)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, GridSpec - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic GridSpec and manual loop; could use plt.subplots() more
+          efficiently but doesn't leverage any advanced matplotlib features
+  verdict: APPROVED
diff --git a/plots/scatter-matrix/metadata/plotly.yaml b/plots/scatter-matrix/metadata/plotly.yaml
index 1057dcab4a..c6649dd0df 100644
--- a/plots/scatter-matrix/metadata/plotly.yaml
+++ b/plots/scatter-matrix/metadata/plotly.yaml
@@ -26,3 +26,175 @@ review:
   - Yellow color for Versicolor may have accessibility issues for some colorblind
     users
   - Legend overlaps slightly with the top-right scatter cell data area
+  image_description: 'The plot displays a 4×4 scatter plot matrix (SPLOM) visualizing
+    Iris-like flower measurement data. The diagonal contains overlapping histograms
+    showing distributions for each of the four variables (Sepal Length, Sepal Width,
+    Petal Length, Petal Width - all in cm units). Off-diagonal cells contain scatter
+    plots showing pairwise relationships. Three species are color-coded: Setosa (blue,
+    #306998), Versicolor (yellow, #FFD43B), and Virginica (red/coral, #E74C3C). The
+    legend is positioned in the upper-right corner with a "Species" title. The title
+    "scatter-matrix · plotly · pyplots.ai" appears centered at the top. A white/clean
+    template is used with subtle grid lines. Variable names appear as axis labels
+    on the left (y-axis) and bottom (x-axis) edges of the matrix.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable; slightly small
+          tick fonts on histogram cells
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized with good transparency; histogram
+          bars slightly overlap but distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, red palette is distinctive but not fully colorblind-safe
+          (yellow can be hard to distinguish)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format appropriate for matrix; good use of canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units (cm) on all edge axes
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; legend well-placed but partially overlaps scatter
+          area in top-right cell
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot matrix/SPLOM
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All pairwise combinations correctly shown
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has histograms on diagonal, scatter on off-diagonal, color encoding
+          for categories
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Species names match data correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-matrix · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows correlations, clusters, and distributions well; demonstrates
+          species separation clearly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris-like botanical measurement data is a classic, real-world example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for flower measurements; some generated values
+          slightly outside typical ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save; no unnecessary functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses graph_objects and make_subplots correctly, but could leverage
+          plotly.express scatter_matrix or more interactive features
+  verdict: APPROVED
diff --git a/plots/scatter-matrix/metadata/plotnine.yaml b/plots/scatter-matrix/metadata/plotnine.yaml
index a5c9d26232..6f50da5bf0 100644
--- a/plots/scatter-matrix/metadata/plotnine.yaml
+++ b/plots/scatter-matrix/metadata/plotnine.yaml
@@ -24,3 +24,181 @@ review:
   weaknesses:
   - Uses warnings.filterwarnings("ignore") which could hide legitimate issues
   - Some data transformation loops add complexity (though necessary for facet approach)
+  image_description: 'The plot displays a 4×4 scatter matrix (pairplot) for iris-like
+    data with four variables: Petal Length (cm), Petal Width (cm), Sepal Length (cm),
+    and Sepal Width (cm). The diagonal cells show kernel density estimates (KDE) for
+    each variable. Off-diagonal cells show scatter plots for each variable pair. Data
+    is color-coded by Species with three groups: setosa (blue/Python Blue #306998),
+    versicolor (yellow/Python Yellow #FFD43B), and virginica (coral/salmon #E07A5F).
+    The title "scatter-matrix · plotnine · pyplots.ai" appears centered at the top
+    in bold. Variable names are displayed along the top edge (columns) and right edge
+    (rows) as strip labels. A legend showing Species with colored markers is positioned
+    on the right side. The plot has a clean minimal theme with subtle gridlines and
+    good use of transparency (alpha=0.7) for the scatter points.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is bold and readable, strip labels are clear, axis tick labels
+          are legible but could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are appropriately sized with good alpha for the data density
+          (150 points), density plots are visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and coral provide excellent contrast and are distinguishable
+          for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with 4×4 grid filling the space well, though
+          bottom-left x-axis labels are slightly cramped
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Variable names include units "(cm)" and are descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend is well placed on right, but legend could
+          be slightly larger
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter matrix/pairplot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All pairwise combinations correctly shown
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has diagonal KDE distributions, off-diagonal scatters, color encoding
+          by category
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all three species
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-matrix · plotnine · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows correlations (positive between petal dimensions), clear species
+          separation, different distributions on diagonal
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Iris-like data is a classic, appropriate choice for demonstrating
+          scatter matrices
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for iris measurements (sepal 4-8cm, petal 0-7cm,
+          widths appropriate)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses some helper loops for data transformation which is necessary
+          for this complex plot, but mostly follows KISS
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports are used, but warnings.filterwarnings could be avoided
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses warnings.filterwarnings("ignore") which suppresses potential
+          issues
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plotnine's grammar of graphics with ggplot, facet_grid,
+          geom_point, geom_density, and theming system. The layered approach is idiomatic.
+          Could have explored additional plotnine features.
+  verdict: APPROVED
diff --git a/plots/scatter-matrix/metadata/pygal.yaml b/plots/scatter-matrix/metadata/pygal.yaml
index d3629aa6bd..7f5a59f879 100644
--- a/plots/scatter-matrix/metadata/pygal.yaml
+++ b/plots/scatter-matrix/metadata/pygal.yaml
@@ -25,3 +25,177 @@ review:
   - Axis tick labels within cells are somewhat small relative to the overall image
     size
   - Could benefit from subtle grid lines within scatter cells for reference
+  image_description: 'The plot displays a 4×4 scatter matrix visualization using the
+    Iris-like dataset variables: Sepal Length, Sepal Width, Petal Length, and Petal
+    Width. The title "scatter-matrix · pygal · pyplots.ai" is centered at the top
+    in dark text. Each diagonal cell shows a histogram with blue bars representing
+    the univariate distribution of that variable. Off-diagonal cells display scatter
+    plots with blue dots (approximately dots_size=7) showing pairwise relationships
+    between variables. The scatter points have good transparency (opacity ~0.55),
+    making overlapping points distinguishable. Variable names appear as rotated labels
+    on the left edge and horizontal labels along the bottom. The cells have a light
+    gray (#f8f8f8) background. Clear correlation patterns are visible: positive correlations
+    between Sepal Length, Petal Length, and Petal Width; negative correlation between
+    Sepal Width and the others. The overall layout uses a 3600×3600 square format.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and variable labels are clearly readable; some axis tick labels
+          are slightly small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter dots well-sized for 100 points; histograms clearly visible;
+          transparency handles overlapping well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) is accessible; no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; slight asymmetry in margins but overall well-balanced
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Variable names present but no units (which is appropriate for Iris-like
+          data)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No explicit grid lines; no legend needed for single series
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter matrix with histograms on diagonal
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All pairwise combinations correctly shown
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal histograms present; off-diagonal scatter plots present;
+          variable labels on edges
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend, but not needed for single-color data
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-matrix · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows correlations (positive and negative); shows distributions;
+          100 points is appropriate
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris-like dataset is a classic real-world example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for flower measurements
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses functions/classes implicitly through PIL composite approach;
+          more complex than typical but necessary for pygal
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' correctly but uses PIL composite approach
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of pygal.XY for scatter and pygal.Histogram for diagonal;
+          creative composite approach with cairosvg and PIL to overcome pygal's single-chart
+          limitation
+  verdict: APPROVED
diff --git a/plots/scatter-matrix/metadata/seaborn.yaml b/plots/scatter-matrix/metadata/seaborn.yaml
index 5c02c2c3ff..aeb1d393d7 100644
--- a/plots/scatter-matrix/metadata/seaborn.yaml
+++ b/plots/scatter-matrix/metadata/seaborn.yaml
@@ -26,3 +26,176 @@ review:
   - Accessing private attribute g._legend is not ideal; consider using legend_data
     parameter or seaborn move_legend function
   - Marker size (s=80) could be slightly smaller given the data density in some cells
+  image_description: 'The plot displays a 4×4 scatter plot matrix (pairplot) showing
+    pairwise relationships between four iris-like variables: Sepal Length (cm), Sepal
+    Width (cm), Petal Length (cm), and Petal Width (cm). The diagonal shows kernel
+    density estimates (KDE) with filled distributions for each variable. Three species
+    (Setosa, Versicolor, Virginica) are color-coded using a blue, yellow, and lighter
+    blue palette. Scatter points have white edge outlines and appropriate transparency.
+    The title "scatter-matrix · seaborn · pyplots.ai" appears at the top in bold.
+    A legend for species is positioned on the right side. Axis labels include units
+    (cm) and are clearly readable. The grid uses a subtle whitegrid style.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is large and bold, axis labels are readable with units, tick
+          labels are appropriately sized. Slightly smaller than optimal for some tick
+          values.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Markers are visible with good alpha (0.7) and white edges, but with
+          150 points per species some cells show moderate overplotting
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/blue palette is distinguishable but Setosa (blue) and
+          Virginica (lighter blue) are somewhat similar
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, matrix fills space well, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All labels include units: "Sepal Length (cm)", "Petal Width (cm)",
+          etc.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle whitegrid style, legend well-placed with clear species labels
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot matrix with pairwise scatter plots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Four variables correctly mapped to grid positions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Diagonal shows KDE, off-diagonal shows scatter, color encoding for
+          categories
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Species legend accurately labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "scatter-matrix · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows correlations, clusters, and distributions well; demonstrates
+          species separation clearly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris-like flower measurements are a classic, comprehensible dataset
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for iris measurements, though species means/stds
+          are approximations
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Accessing g._legend is accessing private attributes
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's pairplot which is ideal for this task, with KDE diagonals
+          and hue coloring. Could have used corner=True for more compact display or
+          regression lines.
+  verdict: APPROVED
diff --git a/plots/scatter-regression-linear/metadata/altair.yaml b/plots/scatter-regression-linear/metadata/altair.yaml
index c48117560f..6d137748b2 100644
--- a/plots/scatter-regression-linear/metadata/altair.yaml
+++ b/plots/scatter-regression-linear/metadata/altair.yaml
@@ -24,3 +24,183 @@ review:
   weaknesses:
   - Could use Altair built-in transform_regression() for the regression line and band,
     though this would lose the ability to display R² and equation
+  image_description: The plot displays a scatter plot with linear regression showing
+    the relationship between Advertising Spend ($K) on the x-axis (ranging from 10
+    to 98) and Sales Revenue ($K) on the y-axis (ranging from approximately 10 to
+    110). Blue filled circular points (with moderate transparency ~0.65) show 80 data
+    points with clear positive correlation. A yellow/gold solid regression line runs
+    through the data from lower-left to upper-right. A semi-transparent yellow confidence
+    band surrounds the regression line, showing the 95% CI that widens slightly at
+    the extremes. The title "scatter-regression-linear · altair · pyplots.ai" appears
+    at the top-left. The regression equation "y = 0.84x + 12.85 | R² = 0.902" is displayed
+    prominently in the upper portion of the plot. Grid lines are subtle with low opacity.
+    The layout is well-balanced with good canvas utilization.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (~28pt), axis labels are clear (~22pt), tick labels
+          readable (~18pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=150) with good alpha (0.65), appropriate
+          for 80 data points. Could be slightly larger but still good.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points (#306998) and yellow line (#FFD43B) provide excellent
+          contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, plot fills appropriate space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Advertising Spend ($K)", "Sales
+          Revenue ($K)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3) which is good. No legend needed for this
+          plot type. However, regression annotation is positioned well.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with linear regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (advertising spend) and Y (sales revenue) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: R² displayed, regression line distinct,
+          95% CI band with transparency, points have alpha ~0.65, axis labels descriptive,
+          regression equation shown'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate scaling (zero=False)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; annotation serves this purpose well
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "scatter-regression-linear · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows strong linear relationship (R²=0.902) with realistic noise/scatter.
+          Good range of values. Could show slightly more variation in correlation
+          strength but still demonstrates concept well.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Advertising spend vs sales revenue is a classic, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values are realistic: $10K-$100K advertising spend yielding $15K-$100K+
+          sales revenue'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → statistics → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png AND plot.html which is correct for Altair
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses Altair's declarative layering (alt.layer) and encoding system
+          well. Could leverage transform_regression() for cleaner implementation,
+          but manual calculation allows displaying R² which Altair doesn't provide
+          natively. Good tooltip support.
+  verdict: APPROVED
diff --git a/plots/scatter-regression-linear/metadata/bokeh.yaml b/plots/scatter-regression-linear/metadata/bokeh.yaml
index cd7a411e55..7622d78165 100644
--- a/plots/scatter-regression-linear/metadata/bokeh.yaml
+++ b/plots/scatter-regression-linear/metadata/bokeh.yaml
@@ -25,3 +25,174 @@ review:
     to the statistical information
   - Does not leverage Bokeh distinctive interactive features (hover tooltips showing
     point values would enhance the visualization)
+  image_description: The plot displays a scatter chart with 80 blue data points showing
+    the relationship between Study Hours (x-axis, range 1-10) and Exam Score % (y-axis,
+    range ~35-105). A golden/yellow regression line runs diagonally from bottom-left
+    to top-right, surrounded by a light blue semi-transparent 95% confidence interval
+    band. The regression equation "y = 5.40x + 42.85" and "R² = 0.791" are annotated
+    in the upper-left area with a white background. The title "scatter-regression-linear
+    · bokeh · pyplots.ai" appears at the top. A legend in the bottom-right corner
+    identifies "Linear Regression" (line) and "Data Points" (scatter). The overall
+    layout uses a white background with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and annotation are readable; tick labels slightly
+          small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter points well-sized (size=18) with good alpha (0.65); could
+          be slightly larger for 80 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe and distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills appropriate space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Study Hours", "Exam Score (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid alpha is good (0.3), but legend is positioned in bottom-right
+          corner which is far from the annotation; legend labels could be more descriptive
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with linear regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (hours → scores)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: R², regression line, confidence band,
+          equation annotation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-regression-linear · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows linear relationship with appropriate scatter/noise; R²=0.79
+          demonstrates good but not perfect fit
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Study hours vs exam scores is a classic, relatable educational research
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Hours 1-10 reasonable; scores 35-105 mostly realistic though some
+          exceed 100%
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → regression → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and Band model correctly, but doesn't leverage
+          Bokeh's interactive features (tooltips, hover tools) which would be distinctive
+  verdict: APPROVED
diff --git a/plots/scatter-regression-linear/metadata/highcharts.yaml b/plots/scatter-regression-linear/metadata/highcharts.yaml
index 035032667a..c73b9d0a51 100644
--- a/plots/scatter-regression-linear/metadata/highcharts.yaml
+++ b/plots/scatter-regression-linear/metadata/highcharts.yaml
@@ -25,3 +25,179 @@ review:
   weaknesses:
   - Legend positioned in top-right corner with some wasted space; could be integrated
     more tightly with the plot area
+  image_description: 'The plot displays a scatter plot with linear regression showing
+    the relationship between Advertising Spend ($K) on the x-axis (ranging from 10-98)
+    and Sales Revenue ($K) on the y-axis (ranging from 10-280). Blue circular data
+    points (80 points with ~0.7 alpha) are scattered around a bright yellow regression
+    line. A light gray/blue semi-transparent 95% confidence interval band surrounds
+    the regression line. The title "scatter-regression-linear · highcharts · pyplots.ai"
+    is prominently displayed at the top in bold. Below it, a subtitle shows the regression
+    equation "y = 2.54x + 12.85 | R² = 0.988". A legend in the top-right corner identifies
+    the three series: 95% Confidence Interval, Regression Line, and Data Points. The
+    grid lines are subtle (light gray), and the overall layout is clean with good
+    use of the canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title at 56px, subtitle at 40px, axis
+          labels at 36px, tick labels at 28px, legend at 28px'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, everything is well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers at radius 12 are well-sized for 80 data points, alpha at
+          0.7 is appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills majority of canvas, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Advertising Spend ($K)" and "Sales
+          Revenue ($K)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle at alpha 0.1, but legend could be positioned better
+          (floating in top-right with some empty space)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with linear regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (advertising spend) and Y (sales revenue) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: scatter points, regression line,
+          confidence interval, R², regression equation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly describe all three series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-regression-linear · highcharts · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear linear relationship with appropriate scatter/noise demonstrating
+          regression fit
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Advertising spend vs sales revenue is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values are realistic: $10-100K ad spend generating $30-270K revenue'
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: While there are no functions/classes, the code structure is more
+          complex due to Highcharts requirements (downloading JS, HTML generation,
+          Selenium)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (extra file is acceptable but noted)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: 'Uses Highcharts-specific features: AreaRangeSeries for CI band,
+          proper series layering, subtitle for equation. Could have used more interactive
+          features like tooltips with data values.'
+  verdict: APPROVED
diff --git a/plots/scatter-regression-linear/metadata/letsplot.yaml b/plots/scatter-regression-linear/metadata/letsplot.yaml
index 6d3f0f6509..6f789e9929 100644
--- a/plots/scatter-regression-linear/metadata/letsplot.yaml
+++ b/plots/scatter-regression-linear/metadata/letsplot.yaml
@@ -27,3 +27,178 @@ review:
   - Wildcard import pattern (from lets_plot import *) is necessary but not ideal
   - Points could be slightly larger (size=6-7) for optimal visibility at 100 data
     points
+  image_description: The plot displays a scatter plot with linear regression showing
+    the relationship between Advertising Spend ($K) on the x-axis (ranging from 10
+    to 100) and Sales Revenue ($K) on the y-axis (ranging from approximately 60 to
+    320). Data points are rendered as blue circles (#306998) with moderate transparency.
+    A yellow/gold regression line (#FFD43B) runs diagonally from bottom-left to top-right
+    with a semi-transparent confidence band (95% CI) in matching yellow. The regression
+    equation "y = 2.37x + 56.7" and "R² = 0.888" are annotated in the upper-left portion
+    of the plot area. The title "scatter-regression-linear · letsplot · pyplots.ai"
+    appears at the top. Grid lines are dashed and subtle gray. The overall layout
+    is clean and well-proportioned.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and annotation text are all clearly
+          readable at appropriate sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; annotation is well-positioned in the
+          upper-left away from data
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are appropriately sized with good alpha (0.65) for 100 data
+          points; slightly smaller than optimal but still visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow color scheme is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills the canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Advertising Spend ($K)" and "Sales
+          Revenue ($K)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid uses dashed lines but no legend present (none needed for this
+          plot type)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with linear regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (advertising spend → sales revenue)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: regression line, 95% CI band, R² display,
+          regression equation, appropriate alpha'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; annotation with equation/R² serves as reference
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "scatter-regression-linear · letsplot · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear linear relationship with noise demonstrating regression
+          fit and confidence intervals well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Real-world scenario: advertising spend vs sales revenue is a classic
+          business analytics use case'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible ($10K-$100K ad spend, $60K-$320K revenue) though
+          slightly high revenue for some ad spend levels
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple structure: imports → data → calculations → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Fixed seed: np.random.seed(42)'
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Imports are functional but uses wildcard import with noqa comments
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_smooth with method="lm", layer_tooltips for interactivity,
+          ggsize for proper sizing; good use but could leverage more lets-plot specific
+          theming
+  verdict: APPROVED
diff --git a/plots/scatter-regression-linear/metadata/matplotlib.yaml b/plots/scatter-regression-linear/metadata/matplotlib.yaml
index 55feaa9740..be8c6d7a60 100644
--- a/plots/scatter-regression-linear/metadata/matplotlib.yaml
+++ b/plots/scatter-regression-linear/metadata/matplotlib.yaml
@@ -26,3 +26,175 @@ review:
   - Axis labels lack units (could be "Study Hours (hrs)" and "Exam Score (points)")
   - Manual regression calculation rather than using scipy or numpy polyfit - works
     correctly but not leveraging library ecosystem
+  image_description: 'The plot shows a scatter plot with linear regression on a white
+    background. Blue circular markers (with white edge) represent 80 data points showing
+    the relationship between Study Hours (x-axis, range 1-10) and Exam Score (y-axis,
+    range ~30-100). A prominent red regression line runs diagonally from bottom-left
+    to top-right with a semi-transparent yellow/gold 95% confidence interval band
+    surrounding it. The title reads "scatter-regression-linear · matplotlib · pyplots.ai"
+    at the top. An annotation box in the upper-left corner displays the regression
+    equation "y = 6.32x + 33.12" and "R² = 0.848". A legend in the lower-right shows
+    "95% CI" and "Regression Line". Axis labels are clear: "Study Hours" (x) and "Exam
+    Score" (y). The grid is subtle with dashed lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Markers sized at s=150 with alpha=0.7, appropriate for 80 points
+          (guideline: 100-200 for 30-100 pts)'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and red (#E74C3C) with yellow CI band - good contrast,
+          colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels "Study Hours" and "Exam Score" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha=0.3, dashed), legend well-placed in lower-right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with linear regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (study hours) and Y (exam scores) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: R² displayed, regression line distinct, 95% CI band with transparency,
+          points have alpha
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels CI and regression line
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows linear correlation with noise, demonstrates regression fit
+          and confidence intervals
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Study hours vs exam scores is a classic, comprehensible educational
+          research scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Study hours 1-10, exam scores 20-100 with base 35 + 6pts/hour is
+          realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → regression → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Manual regression calculation instead of using scipy.stats.linregress
+          or numpy.polyfit; could use matplotlib's built-in annotation features more
+  verdict: APPROVED
diff --git a/plots/scatter-regression-linear/metadata/plotly.yaml b/plots/scatter-regression-linear/metadata/plotly.yaml
index 43ee121e23..b404d1cf56 100644
--- a/plots/scatter-regression-linear/metadata/plotly.yaml
+++ b/plots/scatter-regression-linear/metadata/plotly.yaml
@@ -24,3 +24,182 @@ review:
   - Does not use plotly express trendline features (px.scatter with trendline="ols")
     which would be more idiomatic
   - Legend positioned in lower-right could overlap with data in some scenarios
+  image_description: 'The plot displays a scatter chart showing the relationship between
+    Advertising Spend (x-axis, in thousands $, ranging from ~5 to 105) and Sales Revenue
+    (y-axis, in thousands $, ranging from ~50 to 300). Blue circular markers with
+    a dark blue outline represent 80 data points with moderate transparency (~0.65
+    alpha). A prominent yellow/gold regression line runs through the data showing
+    a strong positive linear relationship. A light blue semi-transparent confidence
+    interval band surrounds the regression line. The title "scatter-regression-linear
+    · plotly · pyplots.ai" is centered at the top. An annotation box in the upper-left
+    corner displays the regression equation (y = 2.56x + 26.77), R² = 0.974, and r
+    = 0.987. A legend in the lower-right corner shows three items: 95% CI, Data points,
+    and Regression (R² = 0.974). The plot uses a clean white template with subtle
+    gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt, annotation
+          at 20pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, annotation and legend positioned clearly
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 14 with 0.65 alpha is well-suited for 80 data points,
+          regression line clearly visible at width 4
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast, colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins (l=80, r=60, t=100,
+          b=80)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Advertising Spend (thousands $)"
+          and "Sales Revenue (thousands $)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.1), but legend placement in lower-right creates
+          slight visual imbalance; could be better positioned
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with linear regression line
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (advertising spend) and Y (sales revenue) correctly assigned as
+          independent/dependent variables
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: R² displayed, regression line distinct,
+          95% CI band with transparency, point transparency, regression equation annotation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify 95% CI, Data points, and Regression
+          with R² value
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "scatter-regression-linear · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows linear relationship with noise, demonstrates regression fit,
+          confidence interval widens appropriately at edges
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Advertising spend vs sales revenue is a classic, realistic business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values are reasonable but the very high R² (0.974) represents an
+          unusually clean relationship; real-world data typically has more scatter
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → calculations → figure → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html for interactivity)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses plotly's graph_objects and annotations well, but doesn't leverage
+          plotly's trendline capabilities via px.scatter(trendline) or hover customization
+  verdict: APPROVED
diff --git a/plots/scatter-regression-linear/metadata/plotnine.yaml b/plots/scatter-regression-linear/metadata/plotnine.yaml
index f9f75749a8..46d840c85f 100644
--- a/plots/scatter-regression-linear/metadata/plotnine.yaml
+++ b/plots/scatter-regression-linear/metadata/plotnine.yaml
@@ -23,3 +23,174 @@ review:
   - Well-configured theme with proper font sizes for 4800×2700 output
   weaknesses:
   - Grid lines appear slightly more visible than ideal despite alpha settings
+  image_description: The plot displays a scatter plot with linear regression showing
+    the relationship between Study Hours (x-axis) and Exam Score in percentage (y-axis).
+    Blue scatter points (~80 data points) are distributed across the plot showing
+    a clear positive linear correlation. A golden/yellow regression line runs diagonally
+    from bottom-left to top-right, with a semi-transparent yellow confidence band
+    around it. The title "scatter-regression-linear · plotnine · pyplots.ai" appears
+    at the top. The regression equation (y = 5.26x + 43.53) and R² = 0.871 are annotated
+    in the upper-left area. The background uses a minimal theme with subtle gray grid
+    lines. The layout is clean with good 16:9 proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points well-sized with good alpha (0.65), though could be slightly
+          larger for 80 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Perfect 16:9 layout with balanced margins and good canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Study Hours" and "Exam Score (%)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed, but grid could be more subtle (alpha values set
+          but appear slightly visible)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with linear regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (study hours → exam score)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: R² displayed, regression line distinct,
+          confidence band with transparency, points with alpha, equation annotation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed for single series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: `scatter-regression-linear · plotnine · pyplots.ai`'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear linear relationship with scatter around the line, demonstrates
+          regression fit well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Study hours vs exam score is a classic educational research scenario
+          from the spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Study hours 1-10, exam scores 40-100% are realistic values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of plotnine''s ggplot2 grammar: `geom_smooth(method="lm",
+          se=True)` for regression, `theme_minimal()`, proper `element_text()` sizing,
+          `annotate()` for statistical text'
+  verdict: APPROVED
diff --git a/plots/scatter-regression-linear/metadata/pygal.yaml b/plots/scatter-regression-linear/metadata/pygal.yaml
index 7c68518bfc..2f97baa885 100644
--- a/plots/scatter-regression-linear/metadata/pygal.yaml
+++ b/plots/scatter-regression-linear/metadata/pygal.yaml
@@ -25,3 +25,173 @@ review:
   - Regression line could be more visually prominent against the CI band
   - The regression equation could be placed as a separate annotation rather than in
     the legend for better visibility
+  image_description: 'The plot displays a scatter plot with 80 blue data points showing
+    the relationship between Study Hours (x-axis, range 1-10) and Exam Score (y-axis,
+    range ~37-110). A green/olive linear regression line runs through the data with
+    a semi-transparent salmon/red 95% confidence interval band surrounding it. The
+    legend in the top-left corner shows three entries: "Data Points" (blue), "95%
+    CI Band" (red), and "Regression: y = 5.40x + 42.8 (R² = 0.791)" (green). The title
+    "scatter-regression-linear · pygal · pyplots.ai" appears at the top. The plot
+    has a clean white background with subtle grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable; font sizes are appropriate for
+          the canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Data points are well-sized, regression line visible; slight deduction
+          as the regression line could be thicker for better visibility against the
+          CI band
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, red/salmon, and green colors are distinguishable and colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Study Hours", "Exam Score") but missing units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle; legend placement in corner is functional but the
+          equation in legend is unconventional
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct XY scatter plot with linear regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (Study Hours vs Exam Score)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has regression line, R² value, confidence interval band, all spec
+          requirements met
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all elements
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but regression equation shown in legend instead
+          of as separate annotation
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows linear relationship with scatter and noise; could show more
+          varied residual patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Study hours vs exam scores is a realistic educational scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible (1-10 study hours, ~40-110 exam scores)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple script structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pygal, Style, scipy.stats)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also generates plot.html (minor issue)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses XY chart, custom Style, fill for CI band; could better leverage
+          pygal's interactivity
+  verdict: APPROVED
diff --git a/plots/scatter-regression-linear/metadata/seaborn.yaml b/plots/scatter-regression-linear/metadata/seaborn.yaml
index 85cc66a697..47d7bb7904 100644
--- a/plots/scatter-regression-linear/metadata/seaborn.yaml
+++ b/plots/scatter-regression-linear/metadata/seaborn.yaml
@@ -22,3 +22,171 @@ review:
   weaknesses:
   - X-axis label Study Hours could include units like Study Hours (h) for full marks
     on VQ-06
+  image_description: 'The plot shows a scatter plot with blue circular markers (color
+    #306998) representing exam scores vs study hours. A golden yellow regression line
+    (#FFD43B) runs through the data with a light yellow semi-transparent 95% confidence
+    interval band. The title "scatter-regression-linear · seaborn · pyplots.ai" appears
+    at the top in large font. The x-axis is labeled "Study Hours" (range 0-11) and
+    the y-axis is labeled "Exam Score (%)" (range 30-100). A white annotation box
+    in the upper left shows the regression equation (y = 5.30x + 43.2) and R² = 0.800.
+    The plot has a subtle gray dashed grid, and the data points show a clear positive
+    correlation with appropriate scatter around the regression line.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size s=150 and alpha=0.65 perfect for 80 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Study Hours" is descriptive but lacks units; "Exam Score (%)" has
+          units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but annotation box could have slightly
+          better placement
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with linear regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=study hours, Y=exam scores correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has regression line, confidence band, R² displayed, equation annotation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, annotation is accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: scatter-regression-linear · seaborn · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows regression relationship well, but all points are positively
+          correlated (could show some interesting outliers)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Study hours vs exam scores is a classic, comprehensible educational
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Hours 1-10 and scores 30-100% are realistic values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: sns.regplot with ci=95 is the quintessential seaborn feature for
+          regression plots
+  verdict: APPROVED
diff --git a/plots/scatter-regression-lowess/metadata/altair.yaml b/plots/scatter-regression-lowess/metadata/altair.yaml
index d890d1993a..9858342231 100644
--- a/plots/scatter-regression-lowess/metadata/altair.yaml
+++ b/plots/scatter-regression-lowess/metadata/altair.yaml
@@ -25,3 +25,175 @@ review:
   - No interactive features like tooltips or zoom added despite Altair strength in
     interactivity
   - Markers use outline style which can be slightly less visible than filled markers
+  image_description: 'The plot shows a scatter plot with LOWESS regression on a white/light
+    gray background. Blue circular markers (outline only, Python blue #306998) represent
+    200 data points showing a complex non-linear pattern - an initial rise, a peak
+    around x=2.5-3, a dip around x=5-6, then a steep rise towards x=10. A thick yellow/gold
+    LOWESS curve (#FFD43B) smoothly traces through the data, capturing the sine wave
+    + linear trend pattern. The title "scatter-regression-lowess · altair · pyplots.ai"
+    appears at the top in black text. X-axis is labeled "Independent Variable (x)"
+    ranging 0-10, Y-axis is labeled "Dependent Variable (y)" ranging -1 to 9. Subtle
+    grid lines are visible. The plot uses good canvas utilization with proper margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, ticks at 18pt, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized (size=100) with good opacity (0.6) for 200 points;
+          LOWESS line prominent with strokeWidth=4
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is good and colorblind-safe; outline-only markers
+          could be slightly harder to see than filled
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins, plot fills appropriate
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (generic "Independent Variable (x)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at 0.3 opacity; no legend needed for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter + LOWESS regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: LOWESS curve visually distinct (yellow, thick line), moderate smoothing
+          (frac=0.3), transparency on points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; would be redundant
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-regression-lowess · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows non-linear relationship with varying patterns across x-range;
+          sine wave + trend demonstrates LOWESS flexibility
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic but plausible mathematical relationship; could represent
+          real-world non-linear data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values sensible and appropriate for demonstration
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (altair, numpy, pandas, statsmodels lowess)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also plot.html (correct for Altair)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses Altair's declarative layering well; however, LOWESS is computed
+          externally via statsmodels rather than using native Altair transforms. This
+          is acceptable since Altair doesn't have built-in LOWESS, but no interactive
+          features (tooltips, zoom) were added.
+  verdict: APPROVED
diff --git a/plots/scatter-regression-lowess/metadata/bokeh.yaml b/plots/scatter-regression-lowess/metadata/bokeh.yaml
index 374ad40313..ac5e7ec3cf 100644
--- a/plots/scatter-regression-lowess/metadata/bokeh.yaml
+++ b/plots/scatter-regression-lowess/metadata/bokeh.yaml
@@ -26,3 +26,181 @@ review:
     axis labels)
   - Could add HoverTool to show individual point values in interactive version
   - Grid dashed style may be unnecessary - solid subtle lines would be cleaner
+  image_description: The plot displays a scatter plot with 200 blue data points (#306998)
+    showing enzyme activity (%) vs temperature (°C). A smooth yellow/gold LOWESS curve
+    (#FFD43B) is overlaid, clearly showing the characteristic bell-shaped enzyme activity
+    pattern - activity rises from ~20% at 10°C, peaks at approximately 75% around
+    35°C, then decreases back to ~30% at 50°C. The title "scatter-regression-lowess
+    · bokeh · pyplots.ai" appears in the top left. Axis labels include units (°C and
+    %). A legend in the top right identifies "Data Points" and "LOWESS Fit". The grid
+    uses subtle dashed lines. The overall layout is clean with good use of canvas
+    space.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all clearly
+          readable, slightly larger than minimum requirements
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Markers (size=18) and line (width=5) are well-sized for 200 points;
+          alpha=0.6 appropriate; minor: markers could be slightly larger'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast, colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills majority of space; slight imbalance
+          with legend in far corner
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Temperature (°C)"
+          and "Enzyme Activity (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle with alpha=0.3 and dashed style; legend positioned well
+          but could be larger for the canvas size
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: scatter plot with LOWESS regression curve'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=temperature (independent), Y=enzyme activity (dependent) correctly
+          assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: LOWESS curve distinct from points, moderate
+          smoothing (frac=0.4), transparency on points, axis labels, descriptive title'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points and full LOWESS curve
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Data Points" and "LOWESS Fit"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exact format: "scatter-regression-lowess · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: shows complex non-linear relationship with rising, peak,
+          and declining phases - perfect for demonstrating LOWESS capabilities'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: 'Enzyme activity vs temperature is a well-known biological phenomenon;
+          the bell curve is scientifically accurate; minor: could add more context'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature range 10-50°C realistic for enzyme studies; activity
+          0-100% appropriate; peak at ~35°C matches typical enzyme optima
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → LOWESS calculation → plot → styling
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports are used: numpy, bokeh modules, statsmodels lowess'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using `p.scatter()` which is current, but CDN import for save is
+          older pattern
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource properly, figure configuration, and both PNG
+          and HTML export; could leverage more Bokeh-specific features like HoverTool
+          for interactivity
+  verdict: APPROVED
diff --git a/plots/scatter-regression-lowess/metadata/highcharts.yaml b/plots/scatter-regression-lowess/metadata/highcharts.yaml
index f5b59e1be9..048f5a190d 100644
--- a/plots/scatter-regression-lowess/metadata/highcharts.yaml
+++ b/plots/scatter-regression-lowess/metadata/highcharts.yaml
@@ -23,3 +23,186 @@ review:
   - Axis labels are generic (X Value, Y Value) rather than context-specific
   - Custom function definition in code deviates from KISS principle (though necessary)
   - Data could be tied to a more realistic scenario with meaningful labels
+  image_description: 'The plot displays a scatter plot with LOWESS regression on a
+    white background. Blue circular markers (with transparency and blue outline) represent
+    200 data points spread across X values 0-10 and Y values approximately -6 to 25.
+    A smooth yellow/gold LOWESS curve runs through the data, showing a non-linear
+    pattern: starting around y=1, dipping slightly around x=3.5 (to ~-0.5), then rising
+    continuously to about y=21 at x=10. The title "scatter-regression-lowess · highcharts
+    · pyplots.ai" appears at the top in bold, with a subtitle "Non-linear Trend with
+    LOWESS Smoothing (frac=0.3)". Axis labels show "X Value" and "Y Value". A legend
+    in the upper right shows "Data Points" (blue circle) and "LOWESS Curve" (yellow
+    line). Subtle grid lines are visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick marks are all clearly readable
+          at full size. Text sizes are appropriately scaled for the large canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. All labels, tick marks, and legend
+          are cleanly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Markers are well-sized (radius 10) with good alpha transparency
+          (0.6). The LOWESS curve stands out clearly at lineWidth 6. Minor: Some points
+          in dense areas slightly overlap each other.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) for points and yellow (#FFD43B) for the curve provide
+          excellent contrast and are colorblind-safe (blue-yellow is the safest combination).
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good canvas utilization with proper margins. Plot area is well-proportioned.
+          Minor: Legend could be positioned slightly closer to the plot area.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"X Value" and "Y Value" are descriptive but lack units or context
+          (e.g., could be more meaningful like "Measurement Index" or include units).'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha 0.1. Legend is well-placed in upper right
+          with clear labels.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with LOWESS regression overlay.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned, LOWESS curve properly overlaid.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: scatter points with transparency, distinct
+          LOWESS curve (solid yellow line), moderate smoothing bandwidth (frac=0.3),
+          descriptive title.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible, axes appropriately scaled.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels "Data Points" and "LOWESS Curve".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title correctly uses format "scatter-regression-lowess · highcharts
+          · pyplots.ai".
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Data shows complex non-linear relationship (sine + quadratic) that
+          varies across x-axis range. LOWESS effectively captures local trends. Minor:
+          Could show more dramatic non-linearity.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Data is mathematically generated showing a plausible complex pattern.
+          Generic "X Value"/"Y Value" labels reduce context. Could be more anchored
+          to a real-world scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible and demonstrate the intended pattern well.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Code includes a custom `lowess()` function definition. While necessary
+          (Highcharts lacks built-in LOWESS), this deviates from the KISS principle
+          of no functions/classes.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` (and `plot.html` for interactive version).
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Good use of Highcharts features: SplineSeries for smooth curve rendering,
+          interactive legend, Chart/HighchartsOptions pattern, Selenium export pattern
+          as documented.'
+  verdict: APPROVED
diff --git a/plots/scatter-regression-lowess/metadata/letsplot.yaml b/plots/scatter-regression-lowess/metadata/letsplot.yaml
index 0593ad4269..7dd2efffa4 100644
--- a/plots/scatter-regression-lowess/metadata/letsplot.yaml
+++ b/plots/scatter-regression-lowess/metadata/letsplot.yaml
@@ -24,3 +24,179 @@ review:
   - Confidence band fill color (#FFD43B) same as line color - could use a lighter/different
     shade for better visual distinction
   - Some imports listed individually could be consolidated (minor style issue)
+  image_description: 'The plot displays a scatter plot with approximately 200 blue
+    data points (#306998) representing plant growth rate vs temperature. A smooth
+    yellow LOWESS curve (#FFD43B) is overlaid with a light yellow confidence band
+    (se=True). The curve shows a clear non-linear relationship: growth rate increases
+    from ~17 cm/day at 5°C to a peak of ~22 cm/day around 15-17°C, then gradually
+    declines back to ~18 cm/day at 40°C. The title "scatter-regression-lowess · letsplot
+    · pyplots.ai" is properly formatted at the top. Axis labels include units: "Temperature
+    (°C)" and "Growth Rate (cm/day)". The minimal theme provides a clean white background
+    with subtle gray grid lines. Points have appropriate alpha (0.6) showing density
+    while allowing the LOWESS curve to be clearly visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks all clearly readable at full size
+          with appropriate font sizes (24pt title, 20pt axis labels, 16pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Points well-sized (size=4) with good alpha (0.6), LOWESS line clearly
+          visible (size=2.5). Minor deduction: some points at edges slightly small
+          relative to canvas'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points and yellow curve provide excellent contrast, colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills most of the space with balanced
+          margins. Minor whitespace at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Temperature (°C)" and "Growth Rate
+          (cm/day)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Minimal theme with subtle grid, no legend needed for this single-series
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with LOWESS regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=temperature, Y=growth rate correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: LOWESS curve distinct from points (solid yellow vs blue), confidence
+          band included, moderate smoothing (span=0.4), appropriate transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range displayed (5-40°C, 14-27 cm/day)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, single series clearly distinguished by color
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correctly formatted: "scatter-regression-lowess · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows non-linear relationship with varying pattern across x-range
+          (rise, peak, decline). Minor: could show more extreme outliers'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth vs temperature is a real, neutral scientific scenario
+          that naturally exhibits non-linear patterns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature 5-40°C is realistic for plant studies. Growth rate 14-27
+          cm/day is on the high end but plausible for fast-growing plants
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used, but some could be condensed (minor)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_smooth with loess method and se=True for confidence bands,
+          theme_minimal(), element_text() for styling. Good usage but not exceptional
+  verdict: APPROVED
diff --git a/plots/scatter-regression-lowess/metadata/matplotlib.yaml b/plots/scatter-regression-lowess/metadata/matplotlib.yaml
index 57b2012de0..8e2de64646 100644
--- a/plots/scatter-regression-lowess/metadata/matplotlib.yaml
+++ b/plots/scatter-regression-lowess/metadata/matplotlib.yaml
@@ -26,3 +26,169 @@ review:
   - Axis labels lack units (e.g., could be "Measurement Index (samples)" or similar)
   - Data context is generic "Measurement Index" / "Response Value" - could be more
     domain-specific
+  image_description: The plot displays a scatter plot with 200 blue data points (#306998)
+    with white edges and moderate transparency (alpha 0.6). A thick yellow/gold LOWESS
+    curve (#FFD43B, linewidth 4) smoothly traces through the data, capturing the non-linear
+    pattern. The data shows a relatively flat region from x=0-5, followed by an accelerating
+    upward trend from x=5-10. The title "scatter-regression-lowess · matplotlib ·
+    pyplots.ai" appears at the top. X-axis is labeled "Measurement Index" (0-10),
+    Y-axis is labeled "Response Value" (approximately -1 to 17.5). A legend in the
+    upper left identifies "Data points" and "LOWESS fit". A subtle dashed grid (alpha
+    0.3) aids readability.
+  criteria_checklist:
+    visual_quality:
+      score: 39
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size s=100 and alpha=0.6 appropriate for 200 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but missing units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha 0.3), well-placed legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with LOWESS regression curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: distinct LOWESS curve, transparency,
+          moderate smoothing (frac=0.3)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies data points and LOWESS fit
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses {spec-id} · {library} · pyplots.ai
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Demonstrates LOWESS capability with complex non-linear pattern (flat
+          + accelerating regions)
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Generic measurement scenario, plausible but could be more concrete
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sensible values for measurement data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions/classes, simple flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs only
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses axes methods correctly, but no special matplotlib features beyond
+          basic plotting
+  verdict: APPROVED
diff --git a/plots/scatter-regression-lowess/metadata/plotly.yaml b/plots/scatter-regression-lowess/metadata/plotly.yaml
index 1ef499c4e4..c081516f35 100644
--- a/plots/scatter-regression-lowess/metadata/plotly.yaml
+++ b/plots/scatter-regression-lowess/metadata/plotly.yaml
@@ -25,3 +25,179 @@ review:
     labels)
   - Manual LOWESS implementation is educational but verbose; could leverage statsmodels
     for production code
+  image_description: 'The plot displays a scatter plot with LOWESS regression on a
+    white background with a subtle grid. The title "scatter-regression-lowess · plotly
+    · pyplots.ai" is centered at the top in dark gray text. Blue scatter points (Python
+    Blue #306998) with moderate transparency (opacity 0.6) show a complex non-linear
+    relationship - relatively flat from x=0 to x=4, then curving upward steeply from
+    x=5 to x=10. A smooth yellow/gold LOWESS curve (Python Yellow #FFD43B) with width
+    4 overlays the data, clearly showing the underlying trend. The legend is positioned
+    in the upper-left corner with "Data Points" and "LOWESS Curve" entries on a semi-transparent
+    white background. X-axis is labeled "X Value" (0-10), Y-axis is labeled "Y Value"
+    (-5 to 20). The layout uses a clean plotly_white template with light gray gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized (size=10), good alpha (0.6) for 200 points; LOWESS
+          line clearly visible with width=4
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and Yellow (#FFD43B) have excellent contrast and are
+          colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, good use of 16:9 aspect
+          ratio
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels "X Value" and "Y Value" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with rgba(0,0,0,0.1), legend well-placed in upper-left
+          with semi-transparent background
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with LOWESS regression curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped on respective axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: LOWESS curve distinct from points (solid
+          yellow line vs blue markers), moderate smoothing (frac=0.3), moderate transparency
+          on points (0.6), descriptive title mentioning LOWESS'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points without cutoff
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Data Points" and "LOWESS Curve"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "scatter-regression-lowess · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: shows complex non-linear relationship with varying patterns
+          across x-range (flat region, then steep curve), demonstrating why LOWESS
+          is valuable'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Generic but plausible mathematical/scientific scenario; could benefit
+          from a real-world context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sensible values, good spread showing the non-linear relationship
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → LOWESS calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: false
+        comment: Saves as plot.png ✓ (already counted)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Figure, go.Scatter with proper marker/line styling, interactive
+          HTML export; however, implements LOWESS manually instead of using statsmodels.api.nonparametric.lowess
+          which would be more idiomatic for Python
+  verdict: APPROVED
diff --git a/plots/scatter-regression-lowess/metadata/plotnine.yaml b/plots/scatter-regression-lowess/metadata/plotnine.yaml
index 50cc82b99b..6631130976 100644
--- a/plots/scatter-regression-lowess/metadata/plotnine.yaml
+++ b/plots/scatter-regression-lowess/metadata/plotnine.yaml
@@ -24,3 +24,178 @@ review:
     awkward for a yield that should be positive
   - The span parameter in geom_smooth may not be correctly controlling the LOWESS
     bandwidth
+  image_description: The plot displays a scatter plot with 150 data points in a soft
+    blue color (#306998) with 0.6 alpha transparency. A prominent yellow/gold (#FFD43B)
+    LOWESS regression curve smoothly fits through the data, showing a clear non-linear
+    relationship between Temperature (°C) on the x-axis (ranging from ~5 to 35) and
+    Crop Yield (tons/hectare) on the y-axis (ranging from ~0 to ~90). The curve demonstrates
+    a classic optimal temperature response - yields are low at cold temperatures (~5°C),
+    rise through the middle range, peak around 20-22°C at ~78 tons/hectare, then decline
+    at higher temperatures. The title "scatter-regression-lowess · plotnine · pyplots.ai"
+    appears at the top. The plot uses a minimal theme with a clean white background
+    and subtle grid lines. Axis labels include units (°C, tons/hectare). The 16:9
+    landscape layout is well-proportioned.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at ~24pt, axis labels at ~20pt, tick labels at ~16pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points sized well (size=3), alpha=0.6 appropriate for 150 points,
+          LOWESS curve stands out clearly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points and yellow curve provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but the LOWESS curve extends slightly below y=0
+          at the left edge which is a minor visual issue
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Temperature (°C)" and "Crop Yield (tons/hectare)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with minimal theme, no legend needed for this plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with LOWESS regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (temperature) and Y (yield) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Scatter points with transparency, LOWESS curve with contrasting color,
+          smooth curve without overfitting
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "scatter-regression-lowess · plotnine · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows non-linear relationship well; data shows the characteristic
+          bell curve but could show more local variation visible in the LOWESS curve
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Excellent agricultural scenario (crop yield vs temperature) - scientifically
+          plausible and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature 5-35°C is realistic agricultural range, yields 0-90 tons/hectare
+          plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses `geom_smooth(method="lowess")` which is a nice plotnine/ggplot2
+          feature, but `span` parameter may not be the correct parameter name (it
+          should be `frac` for LOWESS in statsmodels). However, the curve appears
+          to work.
+  verdict: APPROVED
diff --git a/plots/scatter-regression-lowess/metadata/pygal.yaml b/plots/scatter-regression-lowess/metadata/pygal.yaml
index 77ace7e58c..1eb12c026b 100644
--- a/plots/scatter-regression-lowess/metadata/pygal.yaml
+++ b/plots/scatter-regression-lowess/metadata/pygal.yaml
@@ -26,3 +26,176 @@ review:
   - Axis labels in rendered output are generic (X Value, Y Value) rather than the
     domain-specific labels defined in the code
   - Legend entries in output differ from code-defined labels
+  image_description: 'The plot displays a scatter plot with blue circular data points
+    and a smooth yellow/gold LOWESS regression curve on a white background. The title
+    reads "scatter-regression-lowess · pygal · pyplots.ai" at the top. The X-axis
+    is labeled "X Value" (ranging from 0 to 10) and the Y-axis is labeled "Y Value"
+    (ranging from approximately -2 to 13). The legend at the bottom shows two entries:
+    "Data Points" (blue) and "LOWESS Curve (frac=0.35)" (yellow). The LOWESS curve
+    smoothly captures the non-linear trend in the data, starting low, rising gradually
+    in the middle, dipping slightly around x=5, then rising steeply toward x=10. Grid
+    lines are subtle and dotted.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable, tick labels are clear. Slightly
+          smaller than optimal for the canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter points and LOWESS curve are clearly visible and well-sized
+          for the data density. Points could be slightly larger.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points and yellow curve provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, minor whitespace imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive ("X Value", "Y Value") but lack units. The
+          code specifies domain-specific labels which don't appear in the rendered
+          output.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate. Legend placement at bottom is functional
+          but legend text could be clearer.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with LOWESS regression overlay
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned, LOWESS curve properly overlaid
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: scatter points, LOWESS curve with distinct
+          color, appropriate smoothing (frac=0.35)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies data points and LOWESS curve with parameters
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-regression-lowess · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows non-linear relationship well with the curve capturing local
+          trends. Good demonstration of LOWESS smoothing behavior.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: The code describes a drug dose-response scenario which is scientifically
+          plausible, though the rendered axis labels are generic.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Data values are realistic and sensible for the demonstrated relationship
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pygal, Style, statsmodels lowess)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart, custom Style for fonts/colors, legend_at_bottom,
+          stroke_style. Good use of pygal capabilities but could leverage more SVG
+          interactivity features.
+  verdict: APPROVED
diff --git a/plots/scatter-regression-lowess/metadata/seaborn.yaml b/plots/scatter-regression-lowess/metadata/seaborn.yaml
index bdf25e7a14..7e11a75419 100644
--- a/plots/scatter-regression-lowess/metadata/seaborn.yaml
+++ b/plots/scatter-regression-lowess/metadata/seaborn.yaml
@@ -24,3 +24,172 @@ review:
   - Axis labels are generic (X Value, Y Value) rather than contextual
   - The synthetic data could show more distinct local variations to better demonstrate
     LOWESS local-fitting capabilities
+  image_description: The plot displays a scatter plot with LOWESS regression on a
+    16:9 canvas. Blue scatter points (#306998) with alpha 0.6 are distributed showing
+    a complex non-linear relationship - relatively flat oscillation from x=0-5, then
+    rising steeply from x=5-10. A bright yellow/gold LOWESS curve (#FFD43B) with linewidth
+    4 smoothly traces through the data, capturing the non-linear trend. The title
+    "scatter-regression-lowess · seaborn · pyplots.ai" is at the top. Axis labels
+    are "X Value" and "Y Value" in large font. A subtle dashed grid with alpha 0.3
+    provides reference. The plot uses good space, with balanced margins.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized at s=100, alpha=0.6 is appropriate for 200
+          points. Slightly dense in some areas but still clear.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points and yellow line provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (generic "X Value" / "Y Value")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid at alpha 0.3, no legend needed for this single-series
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with LOWESS regression curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X and Y correctly assigned to axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: LOWESS curve is visually distinct (contrasting yellow color, thick
+          line), moderate transparency on points, smooth curve
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: `scatter-regression-lowess · seaborn · pyplots.ai`'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows non-linear relationship with varying patterns across x-range.
+          The sine+power combination demonstrates LOWESS ability to adapt, though
+          the relationship could show more distinct local variation.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic mathematical example, plausible but not tied to a real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible, 200 points is appropriate for LOWESS
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` is set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot, numpy, and seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's `regplot` with `lowess=True`, which is the canonical
+          way to create LOWESS plots in seaborn
+  verdict: APPROVED
diff --git a/plots/scatter-regression-polynomial/metadata/altair.yaml b/plots/scatter-regression-polynomial/metadata/altair.yaml
index b4c4e1d337..6065016913 100644
--- a/plots/scatter-regression-polynomial/metadata/altair.yaml
+++ b/plots/scatter-regression-polynomial/metadata/altair.yaml
@@ -27,3 +27,176 @@ review:
     for crop yield
   - Could use Altair built-in transform_regression() instead of manual numpy polyfit
     for more idiomatic implementation
+  image_description: The plot displays a scatter plot with polynomial regression showing
+    the relationship between fertilizer application (kg/ha) on the x-axis and crop
+    yield (tons/ha) on the y-axis. Blue circular markers (#306998) represent 80 data
+    points with 65% opacity, distributed in a clear parabolic pattern. A bold yellow
+    polynomial curve (#FFD43B) fits through the data, demonstrating diminishing returns
+    - yield increases initially then decreases with excessive fertilizer. The R² value
+    (0.805) and polynomial equation (y = -0.75x² + 7.54x + 5.29) are prominently displayed
+    in the upper left corner. The title correctly follows the format 'scatter-regression-polynomial
+    · altair · pyplots.ai'. Axis labels include units (kg/ha and tons/ha). The overall
+    layout is clean with good use of canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (180) with good opacity (0.65) for 80 points;
+          curve is bold and visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe, no red-green issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, minor whitespace at bottom could be reduced
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: ''Fertilizer (kg/ha)'', ''Crop Yield (tons/ha)'''
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid present; while not required, subtle grid would help readability
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with polynomial regression curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned with meaningful variable names
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: R² displayed, polynomial equation shown, curve visually distinct,
+          points have transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes accommodate full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type, annotations are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: ''scatter-regression-polynomial · altair · pyplots.ai'''
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear parabolic pattern with diminishing returns; good noise
+          level; could benefit from more extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Fertilizer vs crop yield is a real agricultural scenario matching
+          spec's 'diminishing returns in economics' application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (0-10 kg/ha fertilizer, 0-25 tons/ha yield),
+          though some negative yield values appear unrealistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: altair, numpy, pandas'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but also 'plot.html' (minor, but spec doesn't
+          require HTML)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative syntax, layered charts, tooltips, and text
+          marks for annotations; could use transform_regression for built-in regression
+          instead of manual numpy calculation
+  verdict: APPROVED
diff --git a/plots/scatter-regression-polynomial/metadata/bokeh.yaml b/plots/scatter-regression-polynomial/metadata/bokeh.yaml
index 5c52b9cb46..748139c197 100644
--- a/plots/scatter-regression-polynomial/metadata/bokeh.yaml
+++ b/plots/scatter-regression-polynomial/metadata/bokeh.yaml
@@ -26,3 +26,178 @@ review:
   - Missing HoverTool for interactive point inspection which is a key Bokeh strength
   - Annotation text size (22pt) is adequate but could be larger (24-26pt) for better
     visibility at full resolution
+  image_description: The plot displays a scatter plot with 100 blue data points (#306998)
+    representing manufacturing efficiency gains (%) against investment (thousands
+    $). A smooth yellow polynomial regression curve (#FFD43B) fits through the data,
+    clearly showing the diminishing returns pattern - efficiency gains increase rapidly
+    at first then taper off. The R² value (0.9765) and polynomial equation (y = -0.0047x²
+    + 1.17x + 20.07) are annotated in the upper-middle area with a white background.
+    The title "scatter-regression-polynomial · bokeh · pyplots.ai" appears at the
+    top left. Axis labels include units ("Investment (thousands $)" and "Efficiency
+    Gain (%)"). A legend in the bottom right shows "Data Points" and "Polynomial Fit
+    (degree 2)". The background is light gray (#fafafa) with subtle dashed grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are readable; annotation text
+          could be slightly larger for optimal visibility
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter points are well-sized (size=18) with good alpha (0.65); regression
+          line is clearly visible (line_width=5)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Investment (thousands $)", "Efficiency
+          Gain (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but legend is positioned in bottom right
+          corner which is far from the main data concentration; legend could be better
+          placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with polynomial regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (investment) and Y (efficiency) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: R² displayed prominently, polynomial curve distinct from points,
+          equation annotated, points have transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly describe elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-regression-polynomial · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows polynomial curve fitting well; could benefit from showing confidence
+          band (optional per spec)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Manufacturing efficiency with diminishing returns is an excellent,
+          realistic economic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Investment 10-100k and efficiency 30-90% are reasonable; the quadratic
+          pattern is well demonstrated
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses ColumnDataSource and Label annotation which are Bokeh-specific,
+          but doesn't leverage Bokeh's interactive features like HoverTool which would
+          show point values on hover
+  verdict: APPROVED
diff --git a/plots/scatter-regression-polynomial/metadata/highcharts.yaml b/plots/scatter-regression-polynomial/metadata/highcharts.yaml
index e4b876a1a7..5e26602b29 100644
--- a/plots/scatter-regression-polynomial/metadata/highcharts.yaml
+++ b/plots/scatter-regression-polynomial/metadata/highcharts.yaml
@@ -27,3 +27,178 @@ review:
   - Could leverage Highcharts-specific features like tooltip formatters or data labels
     for enhanced interactivity
   - Missing optional confidence band mentioned in spec (though marked as optional)
+  image_description: The plot displays a scatter chart with 80 blue data points (semi-transparent
+    with darker blue outlines) showing a quadratic relationship between Advertising
+    Spend ($k) on the x-axis (1-20) and Revenue ($k) on the y-axis (~10-100). A smooth
+    yellow/gold polynomial regression curve (degree 2) fits through the data, clearly
+    demonstrating diminishing returns. The title "scatter-regression-polynomial ·
+    highcharts · pyplots.ai" appears at the top in bold. The subtitle displays the
+    fitted equation "y = -0.277x² + 9.550x + 5.917 | R² = 0.9563". A legend in the
+    top-right corner shows "Data Points" and "Polynomial Fit (R² = 0.956)". The background
+    is white with subtle gray grid lines. The layout is well-balanced with good use
+    of the canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px, axis labels at 36px, tick labels at 28px - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean spacing throughout
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers visible with good alpha, though slightly dense in middle
+          region
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points and yellow curve are colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Advertising Spend ($k)", "Revenue
+          ($k)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid subtle at alpha 0.1, legend well-placed but could be slightly
+          more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with polynomial regression curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (independent/dependent variables)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: R² displayed, polynomial equation shown, curve visually distinct,
+          moderate transparency on points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Data Points" and "Polynomial Fit (R²
+          = 0.956)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "scatter-regression-polynomial · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear polynomial trend with noise, demonstrates diminishing
+          returns well; could show more variance at extremes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Advertising spend vs revenue is a perfect real-world diminishing
+          returns scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible ($1k-$20k spend, $10k-$95k revenue), though
+          revenue ratios might be slightly high
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → fit → chart → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports present
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts Python API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ScatterSeries and SplineSeries appropriately, Highcharts interactive
+          features via HTML export, but doesn't leverage advanced Highcharts-specific
+          features like zones or annotations
+  verdict: APPROVED
diff --git a/plots/scatter-regression-polynomial/metadata/letsplot.yaml b/plots/scatter-regression-polynomial/metadata/letsplot.yaml
index 670c47d759..5d87a9d5b5 100644
--- a/plots/scatter-regression-polynomial/metadata/letsplot.yaml
+++ b/plots/scatter-regression-polynomial/metadata/letsplot.yaml
@@ -24,3 +24,179 @@ review:
   - The geom_ribbon confidence band renders with dark border lines at the edges, which
     appears as two additional gray curves surrounding the yellow polynomial line -
     this is visually distracting
+  image_description: The plot displays a scatter plot with polynomial regression showing
+    the relationship between advertising spend (x-axis, in thousands $) and sales
+    revenue (y-axis, in thousands $). Blue scatter points (#306998) are distributed
+    following a curved pattern showing diminishing returns. A bright yellow polynomial
+    regression curve fits through the data. A light blue confidence band surrounds
+    the curve. The R² value (0.936) is prominently displayed in the upper right, along
+    with the polynomial equation (y = -0.046x² + 3.789x + 21.45). The title "scatter-regression-polynomial
+    · letsplot · pyplots.ai" appears at the top. The plot uses a minimal theme with
+    subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large, axis labels clearly readable at 20pt, tick
+          marks at 16pt, R² and equation annotations easily visible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, equation text and R² well positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points sized appropriately (size=5) for 80 data points, alpha=0.65
+          shows density well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points and yellow curve provide excellent contrast, colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, balanced layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Advertising Spend
+          (thousands $)" and "Sales Revenue (thousands $)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: The confidence band border (dark gray line at edges of ribbon) is
+          visually distracting; appears as additional curves rather than subtle shading
+          boundary. No legend needed for this plot type
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with polynomial regression curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=independent variable, Y=dependent variable correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: R² displayed, polynomial curve distinct
+          from points, confidence band included, polynomial equation shown, appropriate
+          alpha on points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for this plot (no legend needed/present)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "scatter-regression-polynomial · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear diminishing returns pattern - initial steep growth flattening
+          at higher x values, scatter demonstrates variance around the curve
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Economics example of advertising spend vs sales revenue is a real,
+          comprehensible scenario that naturally exhibits diminishing returns
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values are realistic: advertising $5-50K, sales $40-105K, appropriate
+          business context'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used - all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses lets-plot''s ggplot2-style grammar effectively: geom_ribbon
+          for confidence band, geom_point, geom_line, geom_text for annotations, theme_minimal
+          with customization, proper ggsize and ggsave with scale=3'
+  verdict: APPROVED
diff --git a/plots/scatter-regression-polynomial/metadata/matplotlib.yaml b/plots/scatter-regression-polynomial/metadata/matplotlib.yaml
index 244d9b689e..baa749fd71 100644
--- a/plots/scatter-regression-polynomial/metadata/matplotlib.yaml
+++ b/plots/scatter-regression-polynomial/metadata/matplotlib.yaml
@@ -25,3 +25,174 @@ review:
   - Uses only basic matplotlib/numpy; scipy.stats or sklearn could provide more sophisticated
     polynomial fitting
   - Generic axis units (units) rather than more specific economic terms
+  image_description: 'The plot displays a scatter plot with polynomial regression
+    on a 16:9 canvas. The scatter points are blue (#306998) with white edges and moderate
+    transparency (alpha ~0.6), showing 80 data points. A prominent yellow/gold (#FFD43B)
+    quadratic curve fits through the data, surrounded by a light yellow semi-transparent
+    95% confidence band. The title reads "scatter-regression-polynomial · matplotlib
+    · pyplots.ai" at the top. The x-axis is labeled "Investment (units)" and y-axis
+    "Return (units)". A white annotation box in the upper left shows the polynomial
+    equation (y = -0.47x² + 5.69x + 5.20) and R² = 0.859. A legend in the lower right
+    shows three items: Data points, 95% confidence band, and Polynomial fit (degree
+    2). Grid lines are subtle with dashed style. The parabolic curve peaks around
+    x=6, demonstrating diminishing returns.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, ticks 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers s=150 appropriate for 80 points, alpha=0.6 good; slightly
+          large but acceptable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe, no red-green issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Investment (units)", "Return (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid subtle (alpha=0.3), legend placed well but could be positioned
+          away from data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with polynomial regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (Investment vs Return)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: R² displayed, polynomial curve distinct, confidence
+          band, equation annotated'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data properly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels accurate for all three elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-regression-polynomial · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows parabolic curve clearly with peak demonstrating diminishing
+          returns; could show more variation at extremes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Economics diminishing returns example is real and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values reasonable (0-10 investment, 5-25 return); units generic but
+          acceptable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses standard matplotlib; could leverage annotate better or use scipy
+          for more sophisticated fitting
+  verdict: APPROVED
diff --git a/plots/scatter-regression-polynomial/metadata/plotly.yaml b/plots/scatter-regression-polynomial/metadata/plotly.yaml
index db02b9fb63..319cb78e9b 100644
--- a/plots/scatter-regression-polynomial/metadata/plotly.yaml
+++ b/plots/scatter-regression-polynomial/metadata/plotly.yaml
@@ -27,3 +27,180 @@ review:
     parameter)
   - No confidence band around the regression curve (optional per spec but would enhance
     visualization)
+  image_description: The plot displays a scatter plot with 80 data points in blue
+    (#306998) with a polynomial regression curve (quadratic, degree 2) in yellow/gold
+    (#FFD43B). The title "Ad Spend vs Revenue · scatter-regression-polynomial · plotly
+    · pyplots.ai" is centered at the top. The x-axis is labeled "Ad Spend ($K)" ranging
+    from 0-50, and the y-axis is labeled "Revenue ($K)" ranging from approximately
+    10-100. The legend in the top-left corner shows "Data Points" and "Polynomial
+    Fit (degree 2)". An annotation box in the bottom-right corner displays "R² = 0.8817"
+    and the polynomial equation "y = -0.075x² + 4.75x + 10.8". The background is white
+    (plotly_white template) with subtle gray gridlines. The scatter points show a
+    clear diminishing returns pattern - revenue increases with ad spend but eventually
+    plateaus and decreases.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at size 28, axis labels at 22, ticks at 18 - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend and annotation are well-placed
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers at size 14 with 0.65 opacity work well for 80 points, though
+          slightly smaller than optimal for the data density (should be closer to
+          100-200 per guidelines)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins with proper spacing
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Ad Spend ($K)" and "Revenue ($K)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid alpha is 0.3 which is acceptable, but legend at top-left could
+          overlap with data in denser scenarios
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with polynomial regression curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (independent) and Y (dependent) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: R² value displayed, polynomial equation
+          shown, curve visually distinct (yellow vs blue), appropriate transparency
+          (0.65)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points, nothing clipped
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels both traces
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{context} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows diminishing returns pattern clearly, demonstrates curve inflection
+          point. Minor: could benefit from confidence band as mentioned as optional
+          in spec'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Ad spend vs revenue is a perfect real-world example of diminishing
+          returns economics
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic ($0-50K spend, $10-100K revenue), though the
+          rapid decline after ~32K might be slightly aggressive
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → fitting → figure → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs are current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (4800x2700 via 1600x900 scale 3) and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotly_white template and annotations well, produces interactive
+          HTML output, but doesn't leverage plotly's native trendline capabilities
+          (px.scatter with trendline parameter) or hover customization
+  verdict: APPROVED
diff --git a/plots/scatter-regression-polynomial/metadata/plotnine.yaml b/plots/scatter-regression-polynomial/metadata/plotnine.yaml
index d62cb3d1a5..39e29d0304 100644
--- a/plots/scatter-regression-polynomial/metadata/plotnine.yaml
+++ b/plots/scatter-regression-polynomial/metadata/plotnine.yaml
@@ -25,3 +25,180 @@ review:
     more prominent at 16-18pt for better visibility
   - The confidence band uses the same color as the regression line which reduces visual
     distinction
+  image_description: The plot displays a scatter plot with a polynomial regression
+    curve showing the relationship between Temperature (°C) on the x-axis (ranging
+    from 0 to 40) and Energy Consumption (kWh) on the y-axis (ranging from approximately
+    50 to 110). The scatter points are blue (#306998) with moderate transparency (alpha
+    ~0.65). A golden-yellow (#FFD43B) polynomial curve fits through the data showing
+    a clear U-shaped (quadratic) relationship, with a semi-transparent confidence
+    band around it. The equation "y = 0.154x² - 6.204x + 111.68" and "R² = 0.980"
+    are annotated in the upper right corner. The title follows the correct format
+    "scatter-regression-polynomial · plotnine · pyplots.ai". The background is minimal
+    with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, annotation
+          at 14pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, annotation positioned well in empty
+          space
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=4) with good alpha (0.65) for 100 points,
+          curve is prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue points and yellow curve provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, good utilization of
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Temperature (°C)"
+          and "Energy Consumption (kWh)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), but the confidence band color could have
+          better visual separation
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with polynomial regression curve
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (Temperature vs Energy)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: R² displayed, polynomial equation shown, confidence band included,
+          distinct curve color
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, annotation is accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-regression-polynomial · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear U-shaped quadratic relationship, demonstrates the polynomial
+          fit well; could show more variation in one region
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent real-world scenario: temperature vs energy consumption
+          with U-shaped curve (heating at cold temps, cooling at hot temps, optimal
+          at ~20°C)'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature 0-40°C is realistic, energy consumption 50-110 kWh is
+          plausible; values are sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" ✓ but uses verbose=False which is fine
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_smooth with formula syntax which is plotnine-specific,
+          uses grammar of graphics approach; could have used more advanced features
+          like scale_* customization
+  verdict: APPROVED
diff --git a/plots/scatter-regression-polynomial/metadata/pygal.yaml b/plots/scatter-regression-polynomial/metadata/pygal.yaml
index ab1a56348f..e0778ec31f 100644
--- a/plots/scatter-regression-polynomial/metadata/pygal.yaml
+++ b/plots/scatter-regression-polynomial/metadata/pygal.yaml
@@ -23,3 +23,170 @@ review:
   - Data points lack transparency (alpha=0.7 mentioned in legend but not visually
     apparent)
   - Legend position at bottom creates slight visual imbalance
+  image_description: The plot displays a scatter plot with polynomial regression showing
+    plant growth (cm) on the Y-axis versus sunlight exposure (hours) on the X-axis.
+    Blue circular data points are scattered across the chart, with a coral/salmon-colored
+    polynomial regression curve fitting through them. The curve shows a clear parabolic
+    pattern - growth increases from 2 hours of sunlight, peaks around 9-10 hours,
+    then decreases (diminishing returns). The title "scatter-regression-polynomial
+    · pygal · pyplots.ai" appears at the top. Axis labels are clear with units. The
+    legend at the bottom shows "Data Points (α=0.7)" and "Polynomial Fit (R²=0.886)".
+    Grid lines are subtle gray. White background with good contrast.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks all clearly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Data points clearly visible, regression curve distinguishable, slight
+          deduction for dots being somewhat small
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/coral color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, legend at bottom creates slight imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sunlight Exposure (hours)", "Plant Growth
+          (cm)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid visible but legend placement at bottom with R² info is good
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct XY scatter with polynomial regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (sunlight → plant growth)
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has polynomial curve and R² value, missing polynomial equation annotation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels clearly indicate data points and polynomial fit with
+          R²
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "scatter-regression-polynomial · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear curved pattern with peak and decline (diminishing returns)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plant growth vs sunlight is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values realistic: 2-14 hours sunlight, 0-135 cm growth'
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal XY chart, custom Style, legend, but could leverage more
+          pygal-specific features like tooltips
+  verdict: APPROVED
diff --git a/plots/scatter-regression-polynomial/metadata/seaborn.yaml b/plots/scatter-regression-polynomial/metadata/seaborn.yaml
index fb67aeea97..186cd5acd9 100644
--- a/plots/scatter-regression-polynomial/metadata/seaborn.yaml
+++ b/plots/scatter-regression-polynomial/metadata/seaborn.yaml
@@ -24,3 +24,175 @@ review:
   - Legend in lower right slightly overlaps the data region where points exist
   - Uses numpy polyfit instead of seaborn regplot(order=2) which would be more library-idiomatic
   - Generic units for axis labels rather than specific units (e.g., dollars, percentage)
+  image_description: The plot displays a scatter plot with 80 blue-gray data points
+    (#306998 color) with white edges against a whitegrid background. A bright yellow
+    (#FFD43B) quadratic polynomial curve smoothly fits through the data, showing a
+    classic diminishing returns pattern that rises from ~5 at x=0 to peak around ~35
+    at x≈6, then declining towards ~25 at x=10. A semi-transparent yellow confidence
+    band (95% CI) surrounds the curve. The polynomial equation (y = -0.75x² + 9.54x
+    + 5.29) and R² = 0.895 are prominently displayed in a white box in the upper left
+    corner. The title correctly follows the format 'scatter-regression-polynomial
+    · seaborn · pyplots.ai'. Axis labels show 'Investment (units)' and 'Profit (units)'.
+    A well-placed legend in the lower right identifies all three elements.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt, equation annotation 18pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, equation box doesn't obscure data
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers s=150 appropriate for 80 points, alpha=0.65 good density
+          handling
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: ''Investment (units)'', ''Profit (units)'''
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid subtle at alpha=0.3, but legend overlaps/covers lower data points
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct scatter plot with polynomial regression
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (Investment vs Profit)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has R² display, polynomial equation, confidence band, appropriate
+          alpha
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify Data Points, Polynomial Fit, Confidence
+          Band
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format '{spec-id} · {library} · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows curved polynomial relationship well, demonstrates diminishing
+          returns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Investment vs Profit with diminishing returns is a real, comprehensible
+          economic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible, though 'units' is generic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' ✓ (actually correct, miscounted)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.scatterplot and set_style, but polynomial fitting is done
+          with numpy. Could have used seaborn's regplot with order parameter for a
+          more library-idiomatic approach.
+  verdict: APPROVED
diff --git a/plots/shap-summary/metadata/altair.yaml b/plots/shap-summary/metadata/altair.yaml
index 8d72bde59c..38fbd2cd6a 100644
--- a/plots/shap-summary/metadata/altair.yaml
+++ b/plots/shap-summary/metadata/altair.yaml
@@ -26,3 +26,175 @@ review:
     feature rows
   - Interactive features not utilized (no tooltips or zoom/pan) which is an Altair
     strength
+  image_description: 'The plot displays a SHAP summary visualization with 10 features
+    arranged vertically, ordered by feature importance from top (Account Age, Transaction
+    Count) to bottom (Support Tickets). Each feature row contains scattered points
+    representing individual samples, positioned horizontally by their SHAP value (ranging
+    from approximately -0.8 to 0.8). Points are colored using a blue-to-orange diverging
+    color scheme: blue indicates low feature values (0) and orange indicates high
+    feature values (1), with a gradient legend on the right. A vertical dashed line
+    at x=0 separates positive and negative SHAP impacts. The title "shap-summary ·
+    altair · pyplots.ai" appears at the top center. Points are jittered vertically
+    within each feature row to reduce overlap.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text clearly readable at full size, appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Jittering effectively prevents point overlap within rows
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points well-sized for 300 samples, good alpha, though some clustering
+          at zero for less important features
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-orange diverging scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though slight empty space on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with context "SHAP Value (Impact on
+          Model Output)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle but legend could be better positioned; the legend
+          is somewhat isolated on the right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct SHAP summary plot with beeswarm-style visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: SHAP values on X, features on Y, color mapped to feature values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: sorted by importance, diverging colors,
+          zero line, jittering'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows feature value mapping
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "shap-summary · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varying importances and feature effects well, though the pattern
+          is somewhat uniform across features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Credit/financial model context with plausible features (Account Age,
+          Credit Score, Debt Ratio)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: SHAP values in reasonable range, though all features normalized to
+          0-1 is slightly artificial
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of transform_calculate for jittering, layered chart composition,
+          but could leverage more Altair-specific features like interactive selection
+  verdict: APPROVED
diff --git a/plots/shap-summary/metadata/highcharts.yaml b/plots/shap-summary/metadata/highcharts.yaml
index b8caeeca57..b3846cad67 100644
--- a/plots/shap-summary/metadata/highcharts.yaml
+++ b/plots/shap-summary/metadata/highcharts.yaml
@@ -28,3 +28,175 @@ review:
   - All features show similar linear correlation patterns - more diverse SHAP distributions
     (e.g., some features with non-linear effects, clustered values) would better demonstrate
     the plot type capabilities
+  image_description: The plot displays a SHAP summary visualization with 10 features
+    (House Size, Bedrooms, Location Score, Bathrooms, Age, Garage Spaces, Lot Size,
+    School Rating, Crime Rate, Distance to City) ordered vertically by importance.
+    Each feature row contains scattered dots representing individual samples, positioned
+    horizontally by their SHAP values. A blue-to-red color gradient indicates feature
+    values (low=blue, high=red), with a diverging color scale clearly visible. A vertical
+    black line at x=0 separates positive and negative SHAP impacts. The title reads
+    "shap-summary · highcharts · pyplots.ai" with subtitle "Feature Importance and
+    Impact on Model Predictions". The legend on the right shows "Feature Value" with
+    three representative bins (0.0-0.1, 0.4-0.5, 0.9-1.0). The x-axis shows SHAP values
+    ranging approximately from -1 to 10. Points are appropriately jittered vertically
+    to reduce overlap.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and feature names all clearly readable at full
+          size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, jittering prevents data point overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized, good jittering, some dense areas could use alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue-red diverging scale is reasonable for SHAP plots (convention),
+          though not ideal for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, margins appropriate
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "SHAP Value (Impact on Prediction)" but no units for y-axis
+          feature names (acceptable for categorical)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid lines, legend well-placed on right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct SHAP summary scatter plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: SHAP values on X-axis, features on Y-axis, color by feature value
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: sorted by importance, diverging color
+          scale, vertical line at 0, jittering'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows feature value bins
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "shap-summary · highcharts · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variety of feature importances, but correlation pattern is
+          quite uniform across features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: House price prediction is an excellent, neutral, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: SHAP values and feature importance ordering are plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Highcharts scatter series and color binning, but could
+          leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/shap-summary/metadata/letsplot.yaml b/plots/shap-summary/metadata/letsplot.yaml
index 998bf71550..c8f1c3a40c 100644
--- a/plots/shap-summary/metadata/letsplot.yaml
+++ b/plots/shap-summary/metadata/letsplot.yaml
@@ -24,3 +24,180 @@ review:
     in the model context
   - Vertical grid lines are slightly prominent and could be more subtle
   - Some extreme outlier SHAP values (±3) that may distract from the main distribution
+  image_description: The plot displays a SHAP summary visualization for a loan/credit
+    approval model with 10 features. Features are arranged vertically in order of
+    importance with Credit Score at the top and Inquiry Count at the bottom. Each
+    row shows scattered points representing individual samples, with horizontal position
+    indicating SHAP value (impact on model output). Points are colored using a blue-to-red
+    gradient representing normalized feature values (0=blue, 1=red). A dashed vertical
+    line at x=0 separates positive and negative impacts. The title "shap-summary ·
+    letsplot · pyplots.ai" appears at the top. The x-axis is labeled "SHAP Value (impact
+    on model output)" and extends from approximately -3 to 3. The legend on the right
+    shows "Feature Value" with a continuous color scale.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick text are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Feature names are well-spaced with no overlapping text
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are visible with good alpha (0.7), though some overlap in
+          dense areas like Inquiry Count row
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-to-red gradient is colorblind-friendly (diverging scale with
+          distinct endpoints)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins, legend positioned well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis is descriptive but no units; Y-axis is intentionally blank
+          (feature names as labels)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (disabled on Y-axis as appropriate), legend clear
+          but minor vertical gridlines could be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct SHAP summary beeswarm/strip plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: SHAP values on x-axis, features on y-axis, feature values encoded
+          as color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has vertical line at x=0, diverging color scale, jittering for visibility,
+          features sorted by importance, top 10 shown
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full range of SHAP values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Feature Value" with 0-1 scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive and negative effects, different feature importances,
+          some features show clear color-value relationships (Credit Score, Debt Ratio)
+          but color patterns less visible for middle-importance features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Credit/loan approval model is an excellent real-world ML interpretability
+          use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: SHAP values are in reasonable range; some features like Income have
+          extreme outliers (±3) that may be slightly unrealistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html as expected
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar appropriately with scale_color_gradient, theme
+          customization, ggsize, but doesn't use any lets-plot specific advanced features
+          like tooltips
+  verdict: APPROVED
diff --git a/plots/shap-summary/metadata/matplotlib.yaml b/plots/shap-summary/metadata/matplotlib.yaml
index 95701dd287..c5f9b4c5e5 100644
--- a/plots/shap-summary/metadata/matplotlib.yaml
+++ b/plots/shap-summary/metadata/matplotlib.yaml
@@ -27,3 +27,182 @@ review:
     would improve visibility
   - Could show more diverse SHAP patterns (e.g., features where both high and low
     values push predictions the same direction)
+  image_description: 'The plot displays a SHAP summary visualization for a house price
+    prediction model. It shows 10 features ordered by importance from top (Square
+    Footage) to bottom (Year Renovated). Each row contains scattered points representing
+    300 samples, positioned horizontally by their SHAP values. Points are colored
+    using a blue-to-red gradient: blue indicates low feature values, gray for mid-range,
+    and red for high feature values. The x-axis shows "SHAP Value (Impact on Model
+    Output)" ranging from approximately -3 to +3.5. A vertical black line at x=0 separates
+    positive and negative impacts. The title follows the required format: "shap-summary
+    · matplotlib · pyplots.ai". A colorbar on the right shows the "Feature Value"
+    scale from Low (blue) to High (red). The plot uses vertical jittering to reduce
+    point overlap within each feature row.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, x-label at 20pt, y-tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, feature labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Markers sized at s=80 which is reasonable for 300 points with alpha=0.7.
+          Slightly smaller than ideal for this density but still clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-gray-red colormap is colorblind-friendly, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good proportions with colorbar positioned
+          correctly
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis descriptive but lacks units; Y-axis shows feature names which
+          is appropriate
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle vertical grid at alpha=0.3, colorbar well-placed; however
+          no legend for what the colorbar represents at a glance (only "Feature Value"
+          text)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct SHAP summary plot with horizontal scatter rows
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: SHAP values on X, features on Y, color mapped to feature values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: sorted by importance, vertical line at
+          0, jittering, blue-red colormap, top 10 features'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full SHAP value range visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly shows Low/High feature value mapping
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "shap-summary · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive and negative SHAP values, varying importance levels,
+          color correlation with SHAP direction. Could show more non-linear patterns
+          (e.g., where high and low both push same direction)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: House price prediction is a perfect, neutral ML interpretability
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: SHAP values in reasonable range; feature values normalized 0-1 which
+          is standard practice
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib, numpy, LinearSegmentedColormap)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using plt.colorbar() instead of fig.colorbar() is less explicit,
+          but not deprecated
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300 and bbox_inches='tight'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses LinearSegmentedColormap for custom colormap creation, scatter
+          with c parameter for coloring. Could leverage more matplotlib-specific features
+          like Normalize or custom tick formatters.
+  verdict: APPROVED
diff --git a/plots/shap-summary/metadata/plotly.yaml b/plots/shap-summary/metadata/plotly.yaml
index 0457a82263..d2332c1b8a 100644
--- a/plots/shap-summary/metadata/plotly.yaml
+++ b/plots/shap-summary/metadata/plotly.yaml
@@ -27,3 +27,178 @@ review:
     the zero line
   - Feature importance distribution is relatively uniform - could show more dramatic
     differences between important and less important features
+  image_description: The plot displays a SHAP summary visualization with 15 features
+    on the y-axis, sorted by importance from top (mean radius) to bottom (smoothness
+    error). Each row shows scattered dots representing individual samples, positioned
+    horizontally by their SHAP values ranging from approximately -0.3 to +0.3. The
+    dots are colored using a diverging blue-to-red color scheme (RdBu_r) where blue
+    indicates low feature values and red indicates high feature values. A clear vertical
+    black line at x=0 separates positive and negative SHAP impacts. The title "shap-summary
+    · plotly · pyplots.ai" is centered at the top. A colorbar on the right shows the
+    feature value scale from Low to High. The background is white with subtle gray
+    gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 16-18pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, feature names well-spaced on y-axis
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized appropriately (size=8) with 0.7 alpha for 200 samples;
+          jittering prevents complete overlap
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: RdBu_r diverging colorscale is colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good margins (200px left for feature names), plot uses canvas well
+          but some empty space at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "SHAP Value (Impact on Model Output)" and "Feature"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.2), colorbar well-placed, but no traditional
+          legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct SHAP summary beeswarm-style plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: SHAP values on x-axis, features on y-axis, color by feature value
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Vertical line at x=0, diverging color scale, sorted by importance,
+          jittering applied
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled with Low/Medium/High
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"shap-summary · plotly · pyplots.ai" matches required format'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows positive and negative SHAP values, varying importances, but
+          correlation between feature value and SHAP is somewhat uniform across features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses breast cancer diagnostic feature names (radius, texture, perimeter,
+          etc.) - realistic ML interpretability scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: SHAP values in reasonable -0.3 to +0.3 range, feature values simulated
+          realistically
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Using dict syntax instead of go.Layout() is fine, but hovertemplate
+          uses normalized color value instead of actual feature value
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with custom colorscale, add_vline, interactive hover
+          templates, and HTML export. Could have used animation or subplot features
+          more distinctively.
+  verdict: APPROVED
diff --git a/plots/shap-summary/metadata/seaborn.yaml b/plots/shap-summary/metadata/seaborn.yaml
index a6e84e9f8b..7f614bdad0 100644
--- a/plots/shap-summary/metadata/seaborn.yaml
+++ b/plots/shap-summary/metadata/seaborn.yaml
@@ -21,3 +21,173 @@ review:
   weaknesses:
   - Consider using slightly larger marker size (s=10-12) for better visibility of
     individual points
+  image_description: 'The plot displays a SHAP summary visualization with 12 breast
+    cancer features sorted vertically by importance (most important at top). Each
+    row represents a feature with dots representing individual samples. The horizontal
+    axis shows SHAP values ranging from approximately -0.75 to 1.0, with a clear blue
+    vertical reference line at x=0. Points are colored using a coolwarm diverging
+    colormap: blue indicates low feature values (0.0) and red indicates high feature
+    values (1.0). The colorbar on the right clearly labels "Feature Value (Low to
+    High)". Features like "mean area" and "mean concave points" show wide SHAP value
+    distributions, while "mean compactness" shows a narrow distribution. The title
+    reads "shap-summary · seaborn · pyplots.ai" at the top. The layout is clean with
+    subtle gridlines on the x-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, feature names clearly separated, jitter prevents
+          point overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Points visible with good alpha, but marker size=8 could be slightly
+          larger for optimal viewing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Coolwarm colormap is colorblind-safe diverging palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of canvas, colorbar properly integrated
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"SHAP Value (Impact on Model Output)" and "Feature" are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3, but legend=False means no legend (colorbar
+          serves this purpose adequately)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct SHAP summary plot with stripplot approach
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: SHAP values on X, features on Y, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has diverging colormap, vertical zero line, jitter, sorted by importance,
+          top 10 features
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range of SHAP values displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents feature value scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "shap-summary · seaborn · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows varied SHAP distributions but some features show limited spread
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Breast cancer dataset is a real, neutral scientific domain
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: SHAP values are realistic for probability outputs (-0.75 to 1.0)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn, sklearn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern seaborn API used correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of sns.stripplot with hue mapping and palette, sns.despine
+          for styling, but could leverage more seaborn features
+  verdict: APPROVED
diff --git a/plots/silhouette-basic/metadata/altair.yaml b/plots/silhouette-basic/metadata/altair.yaml
index 5058552a7b..168d58f497 100644
--- a/plots/silhouette-basic/metadata/altair.yaml
+++ b/plots/silhouette-basic/metadata/altair.yaml
@@ -27,3 +27,178 @@ review:
   - No interactive selection or highlighting when hovering over clusters (missed Altair
     strength)
   - Bars extending from 0 to silhouette value using x2=alt.value(0) could be clearer
+  image_description: 'The plot displays a silhouette plot for K-means clustering of
+    the Iris dataset with 3 clusters. The visualization uses horizontal bars where
+    each bar represents a sample''s silhouette coefficient. The three clusters are
+    shown in distinct colors: blue (Cluster 0, avg: 0.42), yellow (Cluster 1, avg:
+    0.80), and red (Cluster 2, avg: 0.45). The clusters are stacked vertically with
+    clear visual separation. A dashed red vertical line marks the overall average
+    silhouette score (0.553) with an annotation "Avg: 0.553". The x-axis shows "Silhouette
+    Coefficient" ranging from approximately -0.20 to 1.00. The title reads "silhouette-basic
+    · altair · pyplots.ai" at the top. The legend is positioned in the upper right
+    corner showing cluster labels with their average scores.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clear
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are clearly visible and well-sized, slight deduction for very
+          thin bars at edges
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, red palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight imbalance with legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label "Silhouette Coefficient" but no Y-axis
+          label (hidden by design, acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid (appropriate for this chart type), legend well-placed but
+          slightly small symbols
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct silhouette plot with horizontal bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Silhouette values correctly mapped to x-axis, samples grouped by
+          cluster
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has horizontal bars sorted by cluster, distinct cluster colors,
+          vertical average line with annotation. Minor: cluster section annotations
+          are in legend rather than inline'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis properly shows full range of silhouette values (-0.2 to 1.0)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies clusters with their average scores
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "silhouette-basic · altair · pyplots.ai" format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows positive and negative silhouette values, different cluster
+          qualities (Cluster 1 excellent at 0.80, others moderate). Minor: could show
+          more extreme negative values'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses actual Iris dataset, a classic clustering example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Silhouette values correctly bounded between -1 and 1, realistic distribution
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42 for KMeans
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (altair, numpy, pandas, sklearn modules)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but also saves plot.html (minor extra
+          file)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding, mark_rect for bars, tooltips,
+          layering. Could leverage more interactive features or selections
+  verdict: APPROVED
diff --git a/plots/silhouette-basic/metadata/bokeh.yaml b/plots/silhouette-basic/metadata/bokeh.yaml
index ad1935b4ba..8ac97fdd59 100644
--- a/plots/silhouette-basic/metadata/bokeh.yaml
+++ b/plots/silhouette-basic/metadata/bokeh.yaml
@@ -27,3 +27,174 @@ review:
   - Could benefit from hover tooltips showing individual sample silhouette values
     for interactivity
   - No explicit legend element though cluster labels serve the purpose adequately
+  image_description: |-
+    The plot displays a silhouette plot with three distinct clusters visualized as horizontal bars. From bottom to top:
+    - **Cluster 0** (blue, #306998): 50 samples with the highest silhouette scores, bars extending mostly from 0.4 to 0.95
+    - **Cluster 1** (yellow/gold, #FFD43B): 55 samples with medium-high scores, bars ranging roughly 0.25 to 0.85
+    - **Cluster 2** (green, #2ECC71): 45 samples with more variable scores, including some approaching negative values, ranging from ~-0.1 to 0.75
+
+    A red dashed vertical line marks the average silhouette score (0.609). Each cluster is labeled on the left with "Cluster X" in matching colors, and on the right with sample count and average (e.g., "n=50, avg=0.82"). The title "silhouette-basic · bokeh · pyplots.ai" appears at top-left. X-axis labeled "Silhouette Coefficient", Y-axis labeled "Cluster (samples sorted by silhouette score)". Light gray background with subtle vertical grid lines. The bars are sorted within each cluster from lowest to highest silhouette value.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, axis labels at 32pt, tick labels at 24pt - all perfectly
+          readable at 4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; cluster labels positioned cleanly on
+          left, stats on right
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized with appropriate alpha (0.85), slight gap between
+          bars for clarity
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green are colorblind-safe; distinct even for deuteranopia
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, though right-side stats labels (n=55, avg=0.62
+          for Cluster 1) appear slightly small/faint
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis "Silhouette Coefficient" is descriptive; Y-axis explains sorting
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed for this plot type, but vertical grid at alpha 0.3
+          is appropriate. However, the "Cluster" labels serve as implicit legend.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct silhouette plot with horizontal bars grouped by cluster
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Silhouette values on X-axis, samples stacked vertically within clusters
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Has: horizontal bars sorted within clusters, distinct colors per
+          cluster, vertical line at average, cluster annotations with avg scores'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range (-0.3 to 1.25) accommodates all values including negatives
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Cluster labels and stats are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "silhouette-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows well-separated cluster (0), good cluster (1), and cluster with
+          borderline/negative samples (2) - demonstrates most aspects, though negative
+          values are minimal
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer segmentation by purchase behavior is a realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 150 samples with 3 clusters is reasonable; silhouette values in valid
+          range (-1 to 1)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, bokeh.io, bokeh.models, bokeh.plotting)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png AND plot.html (both appropriate for Bokeh)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Span for reference lines, Label for annotations
+          - good Bokeh patterns but could leverage hover tooltips for interactivity
+  verdict: APPROVED
diff --git a/plots/silhouette-basic/metadata/highcharts.yaml b/plots/silhouette-basic/metadata/highcharts.yaml
index 812b125796..6d719378ec 100644
--- a/plots/silhouette-basic/metadata/highcharts.yaml
+++ b/plots/silhouette-basic/metadata/highcharts.yaml
@@ -25,3 +25,172 @@ review:
   - X-axis (silhouette coefficient axis) label text not visible in the rendered image
   - Y-axis label font could be slightly larger for optimal readability at full resolution
   - Legend text could be larger for better visibility
+  image_description: |-
+    The plot displays a silhouette visualization for a 3-cluster K-means solution on the Iris dataset. Three distinct color-coded clusters are shown with horizontal bars representing individual sample silhouette scores:
+    - **Cluster 0 (blue)**: 50 samples at top with Avg: 0.758 - consistently high scores indicating well-separated cluster
+    - **Cluster 1 (yellow)**: 50 samples in middle with Avg: 0.555 - moderate scores showing decent separation
+    - **Cluster 2 (purple)**: 50 samples at bottom with Avg: 0.361 - lower scores with visible negative values indicating some overlap/misclassification
+
+    A vertical dashed red line marks the average silhouette score (0.558) with label. Title follows the correct format "silhouette-basic · highcharts · pyplots.ai". Subtitle shows "Iris Dataset Clustering (K=3) | Average Silhouette Score: 0.558". Y-axis labeled "Samples (sorted by silhouette score within cluster)". Light background bands distinguish cluster regions. Legend positioned on right side showing sample counts.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, cluster labels, and legend all readable at full
+          size; y-axis label slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars clearly visible and well-sized; some bars quite thin at the
+          dense areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/purple palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis descriptive but missing units for silhouette coefficient axis
+          (x-axis)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle, legend well-placed but could be larger
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct horizontal bar chart for silhouette visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Samples on y-axis, silhouette scores on x-axis, correctly sorted
+          within clusters
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal bars, cluster grouping, vertical
+          average line, cluster annotations'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Shows full range including negative values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows cluster names with sample counts
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format but x-axis missing label text for "Silhouette Coefficient"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows good variation: well-separated cluster (0), moderate (1),
+          and overlapping cluster (2) with negative values'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Iris dataset is a realistic and commonly used clustering example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Silhouette values correctly bounded between -1 and 1 with realistic
+          distributions
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used but could be more concise
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotBands for cluster regions, plotLines for average, tooltips
+          configured, but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/silhouette-basic/metadata/letsplot.yaml b/plots/silhouette-basic/metadata/letsplot.yaml
index e9da30b741..5ce2fdee50 100644
--- a/plots/silhouette-basic/metadata/letsplot.yaml
+++ b/plots/silhouette-basic/metadata/letsplot.yaml
@@ -30,3 +30,174 @@ review:
   - The Iris dataset with 3 clusters produces all positive silhouette values; showing
     some negative values would better demonstrate the plot ability to highlight potential
     misclassifications
+  image_description: 'The plot displays a silhouette coefficient visualization for
+    3 clusters from the Iris dataset. The layout shows horizontal bars representing
+    each sample''s silhouette score, grouped by cluster. Cluster 0 (blue, Python Blue
+    #306998) is at the bottom with avg: 0.42, Cluster 1 (yellow/gold, Python Yellow
+    #FFD43B) in the middle with avg: 0.80, and Cluster 2 (red, #DC2626) at the top
+    with avg: 0.45. A vertical dashed black line indicates the overall average silhouette
+    score (0.55) with "Avg: 0.55" label. Each cluster section is annotated with its
+    name and average score on the left side. The x-axis shows "Silhouette Coefficient"
+    ranging from -0.3 to 1.0, and the y-axis is labeled "Sample Index (sorted within
+    cluster)" with tick marks hidden. A legend on the right shows cluster colors.
+    The title reads "silhouette-basic · letsplot · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, annotations all clearly readable with appropriate
+          font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; cluster labels, average annotation,
+          and legend are well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Horizontal bars are well-sized (size=1.5) and clearly visible; sorted
+          ordering within clusters creates smooth silhouette shape
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, red palette is colorblind-friendly with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good overall layout but some wasted space on the left side of the
+          plot (x-axis extends to -0.3 but data only goes to ~0)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Silhouette Coefficient" and "Sample Index (sorted
+          within cluster)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Y-axis grid is appropriately hidden, but legend is somewhat redundant
+          since cluster labels are already annotated in the plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct silhouette plot with horizontal bars for each sample
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Silhouette coefficients correctly mapped to x-axis, samples to y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: horizontal bars, cluster grouping with
+          distinct colors, vertical line for average, cluster annotations with average
+          scores'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full silhouette range (-0.3 to 1.0), all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies clusters
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "silhouette-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows variation across clusters (Cluster 1 has high scores ~0.80,
+          Cluster 0 and 2 have lower averages). Missing: no negative silhouette values
+          shown which would demonstrate potential misclassification'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses real Iris dataset with K-means clustering - classic, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Silhouette values are realistic but extending x-axis to -0.3 when
+          no negative values exist creates wasted space
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and KMeans random_state=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Minor: saves with path="." which works but is unconventional'
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/silhouette-basic/metadata/matplotlib.yaml b/plots/silhouette-basic/metadata/matplotlib.yaml
index d12e6a94e6..8d1512eb99 100644
--- a/plots/silhouette-basic/metadata/matplotlib.yaml
+++ b/plots/silhouette-basic/metadata/matplotlib.yaml
@@ -23,3 +23,172 @@ review:
   - Y-axis label could be simplified since individual sample indices are not meaningful
   - All silhouette values are positive; showing some negative values would better
     demonstrate full range
+  image_description: 'The silhouette plot displays three clusters from the iris dataset
+    as horizontal filled areas. Cluster 0 (Python blue, #306998) is at the bottom
+    with an average score of 0.42, showing samples with silhouette values ranging
+    from near 0 to about 0.7. Cluster 1 (yellow, #FFD43B) is in the middle with the
+    highest average of 0.80, displaying consistently high silhouette values. Cluster
+    2 (green, #2ca02c) is at the top with an average of 0.45. A red dashed vertical
+    line marks the overall average silhouette score of 0.55. The x-axis shows "Silhouette
+    Coefficient" ranging from -0.2 to 1.0, and the y-axis is labeled "Sample Index
+    (by Cluster)" with no tick marks. Each cluster is annotated with its name and
+    average score. The title reads "silhouette-basic · matplotlib · pyplots.ai". A
+    legend in the lower right shows the average score line.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text, cluster annotations well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: horizontal bars clearly visible with appropriate alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue/yellow/green palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: descriptive labels but no units (silhouette coefficient is unitless,
+          acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: subtle x-axis grid (alpha 0.3), legend well-placed in lower right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct silhouette plot with horizontal bars per sample
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: samples sorted within clusters, grouped by cluster assignment
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: horizontal bars ✓, distinct colors ✓, average line ✓, cluster annotations
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: x-axis shows -0.2 to 1.0, covering full silhouette range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: legend correctly shows average score line
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'uses correct format: silhouette-basic · matplotlib · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows variation in cluster quality (0.42, 0.80, 0.45), sorted samples,
+          but no negative silhouette values shown
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: uses iris dataset as suggested in spec, real clustering scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: silhouette values are realistic (-1 to 1 range), but all values are
+          positive
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: imports → data → plot → save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) and random_state=42 for KMeans
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: uses current matplotlib and sklearn APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: fill_betweenx for silhouette bars, axvline for reference, proper
+          ax methods
+  verdict: APPROVED
diff --git a/plots/silhouette-basic/metadata/plotly.yaml b/plots/silhouette-basic/metadata/plotly.yaml
index 9db2c862c6..9032e0b16a 100644
--- a/plots/silhouette-basic/metadata/plotly.yaml
+++ b/plots/silhouette-basic/metadata/plotly.yaml
@@ -28,3 +28,178 @@ review:
     the full range of the metric
   - Could leverage more Plotly-specific interactive features like custom hover data
     showing sample indices
+  image_description: 'The plot displays a silhouette plot with three horizontal bar
+    clusters representing K-means clustering results on the Iris dataset. The clusters
+    are colored in Python blue (#306998) for Cluster 0 at the bottom, bright yellow
+    (#FFD43B) for Cluster 1 in the middle, and coral red (#E84A5F) for Cluster 2 at
+    the top. Each cluster shows sorted horizontal bars representing individual sample
+    silhouette coefficients. A red dashed vertical line indicates the average silhouette
+    score (0.553) with a label at the top. The x-axis shows "Silhouette Coefficient"
+    ranging from -0.2 to 1.0, and the y-axis shows "Samples (grouped by cluster)"
+    with cluster labels on the left side. The legend in the upper right shows each
+    cluster with its average score (Cluster 0: 0.42, Cluster 1: 0.80, Cluster 2: 0.45).
+    The title follows the correct format: "silhouette-basic · plotly · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, cluster labels well positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible, though some very thin bars
+          at low values are slightly hard to distinguish
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red are colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Silhouette Coefficient" is descriptive but no units (coefficients
+          are unitless, so acceptable)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle, but legend placement overlaps slightly with the average
+          line annotation
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct silhouette plot with horizontal bars grouped by cluster
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Silhouette values correctly mapped to x-axis, samples to y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Horizontal bars sorted within clusters, distinct colors, vertical
+          average line, cluster annotations
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis range [-0.2, 1.0] shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows cluster names with average scores
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "silhouette-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in cluster quality (Cluster 1 well-separated at 0.80,
+          Clusters 0 and 2 moderate at 0.42-0.45), but no negative silhouette values
+          visible which would demonstrate misclassification
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris dataset is a classic, real-world clustering example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Silhouette scores are realistic for K-means on Iris, though the absence
+          of negative values limits educational value
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42 for KMeans
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Plotly and sklearn APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses go.Bar and basic Plotly features, but misses opportunity for
+          interactive hover enhancements beyond basic template, could use subplot
+          annotations more effectively
+  verdict: APPROVED
diff --git a/plots/silhouette-basic/metadata/plotnine.yaml b/plots/silhouette-basic/metadata/plotnine.yaml
index 7d889b5f80..052dfa58bf 100644
--- a/plots/silhouette-basic/metadata/plotnine.yaml
+++ b/plots/silhouette-basic/metadata/plotnine.yaml
@@ -22,3 +22,181 @@ review:
   weaknesses:
   - 'Minor: annotation positioning extends into negative x territory'
   - No negative silhouette values shown to demonstrate misclassification aspect
+  image_description: 'The plot displays a silhouette plot with three clusters (Cluster
+    0, 1, and 2) from the iris dataset clustering. Horizontal bars represent individual
+    samples'' silhouette coefficients, sorted within each cluster. Cluster 0 (blue)
+    is at the bottom with avg: 0.42, Cluster 1 (yellow/gold) is in the middle with
+    avg: 0.80, and Cluster 2 (green) is at the top with avg: 0.45. A vertical red
+    dashed line marks the overall average silhouette score (0.55) with a label "Avg:
+    0.55". The title follows the correct format: "silhouette-basic · plotnine · pyplots.ai".
+    The x-axis shows "Silhouette Coefficient" (0.0 to 0.8 visible range), and the
+    y-axis is labeled "Sample Index (sorted within cluster)" but tick marks are hidden.
+    Each cluster is annotated with its name and average score on the left side. A
+    legend on the right identifies the three clusters by color.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, axis text at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, cluster annotations well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Horizontal bars clearly visible with good segment size (1.5), minor:
+          bars could be slightly thicker for better visual impact'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Green palette is colorblind-safe and distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Silhouette Coefficient" and "Sample Index (sorted
+          within cluster)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Grid is appropriately subtle, but y-axis grid lines removed which
+          is good, legend well placed; minor: cluster annotations could be positioned
+          slightly better to avoid extending into negative x territory'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct silhouette plot with horizontal bars for each sample
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Silhouette values correctly mapped to bar length, samples sorted
+          within clusters
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: horizontal bars, cluster grouping, vertical
+          average line, cluster annotations with averages'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows appropriate range (-0.25 to 1.0) covering all silhouette
+          values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies clusters by color
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "silhouette-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows variation in cluster quality (Cluster 1 much better than 0
+          and 2), sorted samples within clusters; minor: no negative silhouette values
+          visible which would demonstrate misclassification'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses real iris dataset with K-means clustering - a classic, meaningful
+          example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Silhouette scores in valid range (0 to 1), realistic cluster averages
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → clustering → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and random_state=42 for KMeans
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern sklearn and plotnine APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of plotnine''s ggplot2 grammar: geom_segment for horizontal
+          bars, geom_vline for average line, geom_text for annotations, scale_color_manual
+          for custom colors, theme customization with element_blank for y-axis'
+  verdict: APPROVED
diff --git a/plots/silhouette-basic/metadata/seaborn.yaml b/plots/silhouette-basic/metadata/seaborn.yaml
index 27a30e628d..e5a0db203e 100644
--- a/plots/silhouette-basic/metadata/seaborn.yaml
+++ b/plots/silhouette-basic/metadata/seaborn.yaml
@@ -28,3 +28,180 @@ review:
     barh with seaborn styling)
   - All silhouette scores are positive; demonstrating some negative scores would better
     showcase the plot's ability to identify misclassified samples
+  image_description: The plot displays a silhouette plot with 3 clusters visualized
+    using horizontal bars. Cluster 0 (steel blue) is at the bottom with samples ranging
+    from ~0.05 to ~0.55 silhouette scores. Cluster 1 (golden yellow) in the middle
+    shows the best clustering with scores ranging from ~0.55 to ~0.88. Cluster 2 (green)
+    at the top has moderate scores from ~0.30 to ~0.70. A red dashed vertical line
+    marks the overall average silhouette score at 0.553. Per-cluster averages are
+    annotated on the right side (0.417, 0.798, 0.451). The title correctly displays
+    "silhouette-basic · seaborn · pyplots.ai". X-axis shows "Silhouette Coefficient"
+    (0.0-1.0), Y-axis shows "Samples (grouped by cluster)". A legend in the lower
+    right identifies each cluster and the average line.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis labels 20pt, tick labels 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean separation between clusters
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Horizontal bars perfectly adapted with alpha=0.8, height=1.0 for
+          dense display
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/green palette is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good canvas utilization but per-cluster annotations positioned at
+          fixed x=0.92 rather than dynamically; slight imbalance with large right
+          margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (coefficient is unitless, so acceptable)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3, legend well placed; however x-axis only
+          grid could be improved with subtle horizontal reference lines
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct silhouette plot with horizontal bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Samples on Y-axis, silhouette coefficient on X-axis, correctly grouped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: sorted bars within clusters, distinct
+          colors, vertical average line, per-cluster annotations'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows -0.1 to 1.0 covering full coefficient range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies clusters and average line
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exact format: "silhouette-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation between clusters (good vs poor clustering), some
+          negative-ish values near 0, but could show more dramatic misclassification
+          cases (negative scores)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Iris dataset is the classic example for clustering validation
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Good silhouette values (0.05-0.88), though all positive; real-world
+          often has some negative values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) and random_state=42 for KMeans
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses n_init=10 explicitly which is good, but annotation xy parameter
+          uses raw coordinates (0.92) instead of transform
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.despine() for seaborn styling, but core plotting uses matplotlib
+          barh directly rather than seaborn plotting functions; this is necessary
+          since seaborn doesn't have a native silhouette plot function, but limits
+          seaborn's distinctive contribution
+  verdict: APPROVED
diff --git a/plots/slider-control-basic/metadata/altair.yaml b/plots/slider-control-basic/metadata/altair.yaml
index bd28502631..60b3f18485 100644
--- a/plots/slider-control-basic/metadata/altair.yaml
+++ b/plots/slider-control-basic/metadata/altair.yaml
@@ -22,3 +22,173 @@ review:
   - Y-axis scale starts at 0 but data ranges from 140-190 creating unused space at
     bottom
   - Current slider value not prominently displayed near the slider as spec suggests
+  image_description: 'The plot displays a bar chart showing monthly sales data across
+    12 months (Jan-Dec). The bars are colored in a pleasant blue (#306998) with slight
+    transparency. The Y-axis shows "Sales (thousands $)" ranging from 0 to 200, while
+    the X-axis displays month abbreviations. The title "slider-control-basic · altair
+    · pyplots.ai" is prominently displayed at the top with a helpful subtitle "Use
+    the Year slider to filter monthly sales data". The data shows a clear seasonal
+    pattern with peaks in March (~190) and June (~186), and troughs in September (~144).
+    The static PNG captures year 2023 data (the slider''s default value). Note: The
+    slider widget is not visible in the static PNG but would be present in the interactive
+    HTML output.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 28pt, axis labels 18-22pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, month labels well-spaced with labelAngle=0
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar size=40 appropriate for 12 categories, opacity=0.8 good
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but some empty space at bottom (Y starts at 0
+          but data is 140-190)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Sales (thousands $)" and "Month" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed (single series), grid subtle with alpha=0.3
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct interactive bar chart with slider control
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Month on X, Sales on Y, Year as filter parameter
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Slider present with min/max, but current value display relies on
+          tooltip rather than prominent display
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Fixed scale domain=[0, 200] shows all data consistently across years
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, tooltips provide context
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format but subtitle could be more prominent
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal variation and year-over-year growth, but pattern is
+          similar across years
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales data is a perfect real-world scenario for year-based
+          filtering
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values 100-200K are realistic for business context
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current selection_point API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of selection_point with binding_range for interactivity,
+          tooltips, and declarative encoding. Could use additional Altair features
+          like text labels or conditional formatting.
+  verdict: APPROVED
diff --git a/plots/slider-control-basic/metadata/bokeh.yaml b/plots/slider-control-basic/metadata/bokeh.yaml
index d4b826244d..71b4deca60 100644
--- a/plots/slider-control-basic/metadata/bokeh.yaml
+++ b/plots/slider-control-basic/metadata/bokeh.yaml
@@ -25,3 +25,179 @@ review:
   - Missing HoverTool which would enhance the interactive experience by showing exact
     values on hover
   - The interactive note annotation could be more visually integrated
+  image_description: The plot displays a bar chart showing monthly sales data for
+    2024. Blue vertical bars (#306998) represent sales values for each month (Jan-Dec)
+    on the x-axis, with values ranging from approximately 70 to 140 thousand USD on
+    the y-axis. A yellow/gold trend line (#FFD43B) with circular markers connects
+    the tops of the bars, showing the overall trend. The title reads "Monthly Sales
+    (2024) · slider-control-basic · bokeh · pyplots.ai" at the top left. The y-axis
+    is labeled "Sales (thousands USD)" and the x-axis is labeled "Month". A legend
+    appears on the right side showing "Monthly Sales" and "Trend Line". There's also
+    a subtle note in the upper right area indicating "[Interactive] Use slider to
+    change year (2018-2024)". The background is light gray (#fafafa) with subtle horizontal
+    grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 42pt, axis labels at 32pt, tick labels at 24pt - all clearly
+          readable, slightly smaller title would be more balanced
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized at 0.7 width, trend points at size 20, appropriate
+          for 12 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills majority of space, though right
+          margin has some extra space due to legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Sales (thousands USD)" and "Month"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is well-placed but there appears to be rendering artifacts/glitches
+          near the legend area on the right side
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Interactive plot with slider control - correct implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (categorical), Y=sales values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Slider widget with year selection (2018-2024), real-time updates
+          via CustomJS callback, clear min/max labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (0-180) accommodates all data properly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Monthly Sales" (bars) and "Trend Line"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Monthly Sales (2024) · slider-control-basic
+          · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal patterns, growth trend across years, monthly variation
+          - could show more dramatic year-over-year differences
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales data is a neutral, realistic business scenario with plausible
+          seasonal patterns (holiday bump in Dec)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 50-140k USD for monthly sales are realistic, though the growth
+          trend could be slightly more pronounced
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh 3.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct for interactive)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ColumnDataSource, CustomJS callbacks, Slider widget,
+          and export_png. Could leverage more Bokeh features like HoverTool for data
+          inspection.
+  verdict: APPROVED
diff --git a/plots/slider-control-basic/metadata/highcharts.yaml b/plots/slider-control-basic/metadata/highcharts.yaml
index 1144fc9b6f..b53c57ae95 100644
--- a/plots/slider-control-basic/metadata/highcharts.yaml
+++ b/plots/slider-control-basic/metadata/highcharts.yaml
@@ -27,3 +27,173 @@ review:
     is noticeable)
   - The line color shown (brownish-red) differs from the blue (#306998) assigned to
     2023 - appears series colors are not rendering as expected
+  image_description: The plot displays a line chart showing monthly sales data (January
+    through December) for the year 2023. The chart uses a brownish-red line color
+    (#8C564B) with circular markers at each data point. The Y-axis shows "Sales (thousands
+    USD)" ranging from 0 to 310, with clean gridlines. The X-axis shows month labels.
+    A slider control widget is visible at the bottom with a gradient track (blue to
+    yellow) allowing selection between years 2019-2023, with "2023" prominently displayed
+    below. The title follows the correct format "slider-control-basic · highcharts
+    · pyplots.ai" with a subtitle explaining the interaction.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title is large and bold, axis labels
+          are appropriately sized, tick marks are legible'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean layout throughout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width and marker sizes are appropriate for the data density
+          (12 points)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette; the brownish color is distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but slider widget slightly compressed at bottom;
+          minor canvas utilization issue
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Sales (thousands USD)", X-axis has "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is well styled but legend is disabled; should show which year
+          is currently displayed in legend area
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart type for time series data
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Months on X, Sales on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has slider with min/max labels and current value display; missing
+          play/pause animation (noted as optional in spec)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range starting from 0
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (using slider instead of legend)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "slider-control-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows monthly sales pattern with seasonal variation; demonstrates
+          year filtering well but only one year visible at a time
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly sales data is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values (120K-290K range) are realistic for business data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → config → HTML → screenshot'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png (correct) but also creates plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts series show/hide functionality with custom HTML slider;
+          could leverage built-in Highcharts range selector or input controls for
+          tighter integration
+  verdict: APPROVED
diff --git a/plots/slider-control-basic/metadata/matplotlib.yaml b/plots/slider-control-basic/metadata/matplotlib.yaml
index d909abbf62..dfad9a4414 100644
--- a/plots/slider-control-basic/metadata/matplotlib.yaml
+++ b/plots/slider-control-basic/metadata/matplotlib.yaml
@@ -23,3 +23,176 @@ review:
   - Update function uses global fill variable which is not ideal but acceptable for
     widget callbacks
   - Play/pause animation feature mentioned in spec is not implemented
+  image_description: 'The plot displays a line chart showing monthly temperature data
+    for the year 2015. The chart has a blue line (#306998) connecting 12 data points
+    representing each month (Jan-Dec), with yellow/gold (#FFD43B) circular markers
+    at each point. There''s a light blue semi-transparent fill area under the curve.
+    The title reads "slider-control-basic · matplotlib · pyplots.ai" in bold at the
+    top. The y-axis shows "Temperature (°C)" ranging from -5 to 35, and the x-axis
+    shows "Month" with abbreviated month names. A prominent yellow badge in the upper
+    right displays "Year: 2015". At the bottom, there''s a slider widget labeled "Year"
+    with the current value "2015" shown on the right side. The data shows a realistic
+    seasonal temperature pattern with winter lows (~2-3°C) and a summer peak in July
+    (~29°C).'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, month labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized at 12 (appropriate for 12 points), linewidth 3
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe and high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins, slider well positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Temperature (°C)" includes units, "Month" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate (alpha=0.3), but no legend needed/present; deducting
+          slightly as the year indicator serves as quasi-legend
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct interactive plot with slider widget
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months, Y=temperature, parameter=year correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Slider with min/max, current value display, update function. Minor:
+          spec suggests play/pause animation which is not included (-1)'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis range -5 to 35 covers all values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Year badge serves as clear parameter indicator, no other legend needed
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Format is correct but lacks subtitle or additional context (-1)
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows seasonal pattern, multiple years (10 years: 2015-2024), warming
+          trend'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Monthly temperature data is realistic and neutral; generic location
+          (-1)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (-5 to 35°C range for temperate
+          climate)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: Has a function definition (`update`) which is necessary for widget
+          but violates pure KISS (-2)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` is set'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses `matplotlib.widgets.Slider` which is the core feature for this
+          spec, plus `fill_between` and custom styling. Could use more advanced features
+          like animation.
+  verdict: APPROVED
diff --git a/plots/slider-control-basic/metadata/plotly.yaml b/plots/slider-control-basic/metadata/plotly.yaml
index 04eb39eab9..f6fde7bf66 100644
--- a/plots/slider-control-basic/metadata/plotly.yaml
+++ b/plots/slider-control-basic/metadata/plotly.yaml
@@ -26,3 +26,175 @@ review:
     (alpha 0.2-0.3)
   - Spec mentions play/pause animation option if supported - Plotly supports this
     but it was not implemented
+  image_description: 'The plot displays a bar chart showing "Quarterly Sales Performance
+    · 2019" with the subtitle "slider-control-basic · plotly · pyplots.ai". Four blue
+    bars (#306998) represent Q1-Q4 sales data with values labeled above each bar ($107K,
+    $146K, $161K, $143K). The Y-axis shows "Sales ($ Thousands)" ranging from 0-250,
+    and the X-axis shows "Quarter" with labels Q1-Q4. Below the main chart is a horizontal
+    slider widget with year labels from 2019 to 2024, showing "Year: 2019" as the
+    current selection. The slider has a circular handle on 2019 and tick marks for
+    each year. The background is white with a clean, professional appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 16-18pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, bar labels positioned cleanly outside
+          bars
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with good width (0.6), clear borders, data labels
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but bottom margin is large due to slider placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales ($ Thousands)", "Quarter"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No visible grid lines in the image (gridcolor set but very faint),
+          no legend needed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct interactive bar chart with slider control
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Quarters, Y=Sales, Parameter=Year correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Slider with clear min/max labels (2019-2024), current value display
+          ("Year: 2019")'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range [0, 280] covers all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend present (showlegend=False), though not strictly needed
+          for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "slider-control-basic · plotly · pyplots.ai" in
+          subtitle'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows seasonal pattern (Q3 peak), yearly growth trend, multiple years
+          of data
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales data is a real, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values ($107K-$161K) are realistic for business quarterly data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set for reproducible data generation
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Plotly's slider widget with currentvalue display, update
+          method, and styled slider. Could have included animation/play button for
+          extra credit.
+  verdict: APPROVED
diff --git a/plots/slider-control-basic/metadata/pygal.yaml b/plots/slider-control-basic/metadata/pygal.yaml
index 734ba9c5b1..d3d55569b8 100644
--- a/plots/slider-control-basic/metadata/pygal.yaml
+++ b/plots/slider-control-basic/metadata/pygal.yaml
@@ -27,3 +27,170 @@ review:
     with slider indication - fundamentally misrepresents the slider-control spec
   - Static PNG cannot demonstrate the core slider functionality that defines this
     plot type
+  image_description: 'The plot displays a grouped bar chart titled "Quarterly Sales
+    by Year · slider-control-basic · pygal · pyplots.ai". The x-axis shows quarters
+    (Q1, Q2, Q3, Q4) and the y-axis shows "Sales (thousands USD)" ranging from $0K
+    to $200K+. Five color-coded series represent years 2020-2024: blue (2020), yellow
+    (2021), darker blue (2022), muted yellow-green (2023), and gray (2024). Each quarter
+    shows 5 bars grouped together, demonstrating year-over-year growth with Q4 having
+    the highest values. The legend appears at the bottom with labels for each year.'
+  criteria_checklist:
+    visual_quality:
+      score: 32
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Text is readable but legend text at bottom is relatively small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Bars are visible but grouped bars are somewhat crowded
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but 2022 blue and 2020 blue are similar
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Clear labels with units ("Sales (thousands USD)", "Quarter")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, but legend at bottom is far from data
+    spec_compliance:
+      score: 10
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 3
+        max: 8
+        passed: false
+        comment: '**MAJOR ISSUE**: PNG shows grouped bar chart with ALL years, NOT
+          a slider-controlled single-year view. The spec requires "slider widget that
+          allows users to dynamically control a parameter" - PNG cannot demonstrate
+          slider functionality'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (quarters vs sales)
+      - id: SC-03
+        name: Required Features
+        score: 0
+        max: 5
+        passed: false
+        comment: Slider control not visible in PNG output; spec requires "slider should
+          have clear min/max labels and current value display"
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: false
+        comment: Shows "Year XXXX" but not the slider parameter value
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format `slider-control-basic · pygal · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows quarterly data across years with seasonal patterns and growth
+          trend
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales data is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in $80K-$200K range are plausible for quarterly sales
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear flow with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of pygal's custom Style, value_formatter, SVG rendering
+          for HTML, and tooltips
+  verdict: APPROVED
diff --git a/plots/slope-basic/metadata/altair.yaml b/plots/slope-basic/metadata/altair.yaml
index db72dfadd4..0f24511283 100644
--- a/plots/slope-basic/metadata/altair.yaml
+++ b/plots/slope-basic/metadata/altair.yaml
@@ -15,3 +15,14 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: 'The plot shows a slope chart (slopegraph) comparing product
+    sales between Q1 and Q4. Ten products are displayed with lines connecting their
+    Q1 values (left) to Q4 values (right). Lines are color-coded: **blue (#306998)**
+    for products that increased (Laptop, Tablet, Monitor, Mouse, Headphones, Webcam)
+    and **yellow (#FFD43B)** for products that decreased (Phone, Keyboard, Charger,
+    Speaker). Each endpoint has a circular marker and labels on both sides identifying
+    the product. The Y-axis shows "Sales (units)" ranging from ~100 to 1200. The X-axis
+    displays "Q1 Sales" and "Q4 Sales" as period labels. A legend in the top-right
+    shows "Direction" with Increase/Decrease categories. The title reads "slope-basic
+    · altair · pyplots.ai". A subtle dashed grid helps with reading values.'
+  verdict: APPROVED
diff --git a/plots/slope-basic/metadata/bokeh.yaml b/plots/slope-basic/metadata/bokeh.yaml
index 5edd219bb1..03fd69ba51 100644
--- a/plots/slope-basic/metadata/bokeh.yaml
+++ b/plots/slope-basic/metadata/bokeh.yaml
@@ -26,3 +26,180 @@ review:
     version
   - Label text could be slightly larger (20pt instead of 18pt) for optimal readability
     at the target resolution
+  image_description: 'The plot displays a slope chart comparing 10 products'' sales
+    between Q1 and Q4. Blue lines (Python Blue #306998) indicate products with increased
+    sales, while yellow/gold lines (Python Yellow #FFD43B) indicate decreased sales.
+    Each product has labels at both endpoints showing the product name and value (e.g.,
+    "Product A: 85" on the left, "92: Product A" on the right). The time points "Q1"
+    and "Q4" appear at the bottom. The Y-axis is labeled "Sales (thousands)" with
+    values ranging from ~30 to 100. The title "slope-basic · bokeh · pyplots.ai" is
+    centered at the top. The chart effectively shows the direction and magnitude of
+    change for each product.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable at full size. Title at 32pt, axis labels at
+          22pt, tick labels at 18pt, endpoint labels at 18pt. Slightly smaller than
+          ideal for some elements.
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor potential for overlap when values are close (e.g., Products
+          B and G at Q1 are at 72 and 78), but generally well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines are 4px wide with 0.8 alpha, markers are size 18 - excellent
+          visibility for 10 entities.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow is colorblind-safe (not red-green), good contrast. Could
+          be slightly better with a third distinct color for no-change scenario.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot is well-centered with appropriate margins
+          for labels.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales (thousands)" which is descriptive with units. X-axis
+          uses custom labels via Labels, which is appropriate for this chart type.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid with 0.3 alpha, no legend needed as colors are
+          self-explanatory via line direction.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct slope chart/slopegraph implementation.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Entities on lines, values correctly mapped to Y positions at both
+          time points.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels at both endpoints ✓, color coding by direction ✓, time point
+          labels ✓, 10 entities (within 5-15 range) ✓.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-range 25-105 shows all data points clearly.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No explicit legend needed; color meaning is intuitive (blue=up, yellow=down).
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "slope-basic · bokeh · pyplots.ai".
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both increases and decreases, varying magnitudes. Could show
+          more dramatic rank changes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales Q1 vs Q4 is a realistic business scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Sales values 40-95 are plausible. Using "thousands" unit makes sense.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → figure → lines/labels → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed since no randomness
+          used. Acceptable but could note this.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports are used: figure, export_png, save, Label, CDN.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's Label model for custom positioning, figure for plotting,
+          and both PNG export and HTML save for interactivity. Could leverage more
+          Bokeh-specific features like HoverTool for interactivity.
+  verdict: APPROVED
diff --git a/plots/slope-basic/metadata/highcharts.yaml b/plots/slope-basic/metadata/highcharts.yaml
index a9e02967aa..47fae6ce57 100644
--- a/plots/slope-basic/metadata/highcharts.yaml
+++ b/plots/slope-basic/metadata/highcharts.yaml
@@ -26,3 +26,175 @@ review:
     better show the slope differences
   - Grid lines could be more subtle (lower opacity) to avoid competing with data
   - No explicit comment about deterministic data (minor)
+  image_description: 'The plot displays a slope chart (slopegraph) comparing product
+    sales between Q1 and Q4. Eight products (A through H) are shown with lines connecting
+    their Q1 values (left) to Q4 values (right). Blue lines (#306998) indicate products
+    that increased in sales (A, C, D, F, G), while yellow/gold lines (#FFD43B) indicate
+    products that decreased (B, E, H). Each endpoint has a circular marker with labels
+    showing the product name and value (e.g., "Product A: 85" on the left, "Product
+    A: 110" on the right). The title "slope-basic · highcharts · pyplots.ai" appears
+    at the top in bold, with subtitle "Product Sales: Q1 vs Q4 (thousands)" below.
+    The Y-axis is labeled "Sales (thousands)" ranging from 0 to 160 with dashed grid
+    lines. The X-axis shows "Q1" and "Q4" labels at the bottom.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable, good font sizes for 4800x2700
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels are well-positioned at endpoints
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Lines are thick (lineWidth: 6), markers large (radius: 16), excellent
+          visibility'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe (not red-green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with appropriate margins, though Y-axis starts at 0 when
+          data starts around 45
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Y-axis has descriptive label with units: "Sales (thousands)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is dashed which is good, but legend is disabled; for slope charts
+          without legend this is acceptable but loses 1pt; grid could be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct slope chart implementation connecting two time points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Q1 and Q4 values correctly mapped to X positions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Has all spec features: endpoint labels, color-coded by direction,
+          vertical axes labeled'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within chart range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for slope chart (legend disabled, entity names in labels)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "slope-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both increases and decreases, variety of magnitudes, but could
+          show rank crossings more clearly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales Q1 vs Q4 is a plausible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in thousands (45-150) are realistic, though starting Y at
+          0 wastes some space
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart config → series → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts line series with data labels, but could leverage
+          more advanced features like animation or tooltips for static image
+  verdict: APPROVED
diff --git a/plots/slope-basic/metadata/letsplot.yaml b/plots/slope-basic/metadata/letsplot.yaml
index 1847598177..7b7b14989d 100644
--- a/plots/slope-basic/metadata/letsplot.yaml
+++ b/plots/slope-basic/metadata/letsplot.yaml
@@ -24,3 +24,180 @@ review:
   - Color palette (blue/red) is not fully colorblind-safe; consider using a more accessible
     combination
   - Horizontal grid lines could be more subtle with lower alpha
+  image_description: The plot shows a slope chart comparing 10 products (A-J) between
+    Q1 and Q4 sales. Two vertical axes are positioned at x=0 (Q1 Sales) and x=1 (Q4
+    Sales) with values ranging from ~50 to ~220 ($K). Each product is represented
+    by a line connecting its Q1 and Q4 values, with colored endpoints (blue circles
+    for increases, red circles for decreases). Product labels appear on both sides
+    of the chart. Blue lines indicate products that increased (Product A, C, E, F,
+    G, I) and red lines show decreases (Product B, D, H, J). The legend labeled "Change"
+    is positioned on the right side showing "Increase" (blue) and "Decrease" (red).
+    The title "slope-basic · letsplot · pyplots.ai" appears at the top. The background
+    is minimal with subtle horizontal gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, product labels, and legend are all clearly readable
+          at proper sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor potential for overlap with Product E/F labels on right side
+          at 185/215, but readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line thickness (2.5) and point size (7) are well-suited for 10 entities;
+          good alpha (0.85)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Blue (#306998) and red (#DC2626) have good contrast but red-blue
+          is not optimal for colorblindness
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with proper margins for labels; x-axis limits
+          [-0.6, 1.6] provide space for labels
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales ($K)" with units; X-axis shows "Q1 Sales ($K)"
+          and "Q4 Sales ($K)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well placed; vertical grid lines removed (good), but horizontal
+          grid could be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct slope chart/slopegraph implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X correctly maps time periods, Y correctly maps values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels at both endpoints, color coding by direction, time point labels
+          on axes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Increase" and "Decrease" with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "slope-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both increases and decreases, various magnitudes of change,
+          but data is carefully curated to avoid overlap rather than showing realistic
+          patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Sales comparison is plausible; Q1 vs Q4 is reasonable, but "Product
+          A-J" is generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values 55-215K are realistic for quarterly product sales
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used, but could be more compact
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar (geom_segment, geom_point, geom_text, scale_color_manual,
+          theme_minimal), but doesn't leverage lets-plot's unique interactive capabilities
+          in the PNG output
+  verdict: APPROVED
diff --git a/plots/slope-basic/metadata/matplotlib.yaml b/plots/slope-basic/metadata/matplotlib.yaml
index 111d86762c..b4cfd4a876 100644
--- a/plots/slope-basic/metadata/matplotlib.yaml
+++ b/plots/slope-basic/metadata/matplotlib.yaml
@@ -26,3 +26,178 @@ review:
     or integrated differently
   - Right-side labels only show values without product names, requiring visual line-following
     to identify products
+  image_description: 'The plot displays a slope chart (slopegraph) with 8 products
+    comparing Q1 2024 vs Q4 2024 sales figures in millions of dollars. Blue lines
+    represent products with increased sales, while yellow/gold lines show decreases.
+    Products are labeled on the left side with their name and Q1 value (e.g., "Product
+    C: $15.2M"), and the right side shows Q4 values (e.g., "$18.5M"). The title "slope-basic
+    · matplotlib · pyplots.ai" appears at the top. A legend in the upper right distinguishes
+    "Increase" (blue) from "Decrease" (yellow). The Y-axis shows "Sales (Millions
+    $)" ranging from ~6 to 18, with horizontal gridlines. Time periods "Q1 2024" and
+    "Q4 2024" are bold labels at the bottom.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, Y-axis label at 20pt, X-axis labels at 20pt bold,
+          tick labels at 16pt, data labels at 14pt bold - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Label collision avoidance algorithm works well, no overlapping text
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are size 12 with linewidth 3, perfectly visible for 8 data
+          entities
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue (#306998) vs Yellow (#FFD43B) is colorblind-safe; yellow could
+          have slightly better contrast on white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with margins allowing labels; slight excess
+          whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales (Millions $)" with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha 0.3 with dashed style, but legend placement
+          in upper right overlaps potential data space and feels slightly disconnected
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct slope chart/slopegraph connecting two time points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Entities on left, values at start/end correctly positioned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels at both endpoints, color coding by direction (increase/decrease),
+          vertical axes labeled with time points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 8 products visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Increase (blue) and Decrease (yellow)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Follows exact format: "slope-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows both increases and decreases (4 each), varying magnitudes of
+          change. Could show more dramatic contrasts or crossing lines
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product sales comparison between quarters is a classic, believable
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales figures from $6M to $18M are realistic for product quarterly
+          sales
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → processing → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Line2D import is used but could import from matplotlib.lines directly
+          at top level
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib properly with ax methods, Line2D for custom legend,
+          spines manipulation. Could leverage more advanced features like annotations
+          with arrows or ConnectionPatch
+  verdict: APPROVED
diff --git a/plots/slope-basic/metadata/plotly.yaml b/plots/slope-basic/metadata/plotly.yaml
index f72b028e55..72df645b43 100644
--- a/plots/slope-basic/metadata/plotly.yaml
+++ b/plots/slope-basic/metadata/plotly.yaml
@@ -26,3 +26,180 @@ review:
     decrease)
   - Some endpoint labels are positioned close together and could benefit from slight
     vertical adjustment to prevent near-overlap
+  image_description: 'The plot displays a slope chart (slopegraph) comparing product
+    sales between Q1 2024 and Q4 2024 for 10 tech products. Blue lines (#306998 -
+    Python Blue) represent products with sales increases (8 products: Laptop Pro,
+    Wireless Earbuds, Smart Watch, Gaming Mouse, Mechanical Keyboard, Webcam HD, USB
+    Hub). Yellow/gold lines (#FFD43B - Python Yellow) represent products with sales
+    decreases (3 products: Tablet Ultra, Portable SSD, Monitor Stand). Each product
+    has labels at both endpoints showing the product name and sales value in $K. The
+    chart uses a clean white template with subtle horizontal grid lines. The title
+    "Product Sales Q1 vs Q4 · slope-basic · plotly · pyplots.ai" is centered at the
+    top. The Y-axis displays "Sales ($K)" ranging from approximately 30 to 310. The
+    X-axis shows the two time points "Q1 2024" and "Q4 2024".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt, annotations
+          at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Most labels are readable, but some endpoint labels are slightly close
+          together (e.g., Smart Watch/Tablet Ultra on left, Gaming Mouse/Mechanical
+          Keyboard on right)
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Lines have good width (3), markers are sized appropriately (14),
+          clear visual hierarchy
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe, good contrast against
+          white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of margins (200px on sides), plot fills canvas well, but
+          could use slightly more vertical space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales ($K)" with units, X-axis shows time period names
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), but no legend explaining blue=increase,
+          yellow=decrease
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct slope chart (slopegraph) implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Entity names as labels, values at start/end correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels at both endpoints, color coding by direction (increase vs
+          decrease), vertical axes labeled with time points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No traditional legend but color coding is consistent and understandable
+          from context
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Product Sales Q1 vs Q4 · slope-basic · plotly
+          · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both increases (majority) and decreases (3 products), varied
+          magnitudes of change, demonstrates rank changes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech product sales Q1 vs Q4 is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in $30K-$310K range are realistic for product lines
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random data used, but data is hardcoded (acceptable, but deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Plotly's annotation system, hover templates for interactivity,
+          but could leverage more Plotly-specific features
+  verdict: APPROVED
diff --git a/plots/slope-basic/metadata/plotnine.yaml b/plots/slope-basic/metadata/plotnine.yaml
index 87fb72f8ad..fddf007470 100644
--- a/plots/slope-basic/metadata/plotnine.yaml
+++ b/plots/slope-basic/metadata/plotnine.yaml
@@ -23,3 +23,182 @@ review:
   - Minor label overlap around similar values (Product J 180 / Product F 175)
   - Right-side labels could include product names for very long charts
   - Legend could be positioned differently to save horizontal space
+  image_description: The plot displays a slope chart (slopegraph) comparing 10 products'
+    sales between Q1 and Q4. Lines connect values at two time points, with blue (#306998)
+    indicating increases and yellow (#FFD43B) indicating decreases. The left side
+    shows product names with Q1 values in parentheses (e.g., "Product C (200)"), while
+    the right side shows only Q4 values. The y-axis is labeled "Sales (thousands $)"
+    ranging from approximately 65 to 230. X-axis shows "Q1" and "Q4" labels. A legend
+    titled "Change Direction" appears on the right side. The layout is clean with
+    minimal theme and removed vertical gridlines. There is some minor label overlapping
+    visible around the 170-180 range where Product J and Product F are close together.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 18pt, legend text 16-18pt
+          - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 6
+        max: 8
+        passed: true
+        comment: Minor overlap between Product J (180) and Product F (175) labels
+          on the left side; values 165, 160, 155 are close on the right but still
+          readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points sized at 5, lines at 1.5 with alpha 0.8 - excellent visibility
+          for 10 entities
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow is colorblind-friendly (not red-green), though contrast
+          could be slightly higher
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Sales (thousands $)" with units, x-axis appropriately
+          empty (time points shown as tick labels)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is minimal (only horizontal lines, alpha appears subtle), but
+          legend could be positioned better (takes up space on the right)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct slope chart with lines connecting two time points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Entities on left, values correctly mapped to y-axis, time points
+          on x-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels at both endpoints, color coding by direction, vertical axes
+          labeled with time point names (Q1, Q4)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points without clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Increase" and "Decrease" with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "slope-basic · plotnine · pyplots.ai" format correctly
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both increases and decreases, various magnitudes of change,
+          but could show rank crossings more clearly
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly sales comparison is a realistic business scenario mentioned
+          in spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 65-230 thousand are realistic for product sales, though the
+          range could be tighter for better visual differentiation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Uses deterministic data (hardcoded values), but no random seed since
+          not using random data - acceptable
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plotnine's grammar of graphics (ggplot, aes, geom_line,
+          geom_point, geom_text), scale_color_manual, theme customization. Could utilize
+          faceting or other advanced features, but appropriate for this simple plot
+          type.
+  verdict: APPROVED
diff --git a/plots/slope-basic/metadata/seaborn.yaml b/plots/slope-basic/metadata/seaborn.yaml
index fb0d9db518..03b9a4062b 100644
--- a/plots/slope-basic/metadata/seaborn.yaml
+++ b/plots/slope-basic/metadata/seaborn.yaml
@@ -24,3 +24,177 @@ review:
   - Seaborn lineplot used in a loop rather than leveraging native grouping capabilities
   - Could benefit from more dramatic rank changes in the data to better showcase slope
     chart strength
+  image_description: The plot displays a slope chart comparing Q1 vs Q4 revenue for
+    8 tech companies. Lines connect data points between two vertical axes labeled
+    "Q1 Revenue ($M)" and "Q4 Revenue ($M)". Blue (#306998) lines indicate revenue
+    increases (TechCorp, DataFlow, NetWorks, CodeBase, LogicPro - 5 companies), while
+    yellow (#FFD43B) lines show decreases (CloudNine, ByteSize, SoftEdge - 3 companies).
+    Each endpoint is labeled with the company name and value in parentheses. The title
+    "slope-basic · seaborn · pyplots.ai" appears at the top. A legend in the lower
+    right distinguishes "Increase" and "Decrease" categories. The layout has appropriate
+    margins with labels positioned clearly outside the plot area.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, axis labels 20pt, tick labels 16pt, entity labels
+          15pt bold - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 6
+        max: 8
+        passed: true
+        comment: Minor overlap between "LogicPro (125)" and "125" y-axis tick label
+          on the left side; slight crowding around 160-178 range
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 3.5 and marker size 14 are well-suited for 8 entities,
+          alpha 0.85 provides good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas with adjusted margins (left=0.22, right=0.78)
+          accommodating labels
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Revenue ($M)" with units, but x-axis labels are tick
+          labels showing period names rather than axis label
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid alpha 0.3 with dashed lines is subtle; legend placed well but
+          could be more integrated
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct slope chart/slopegraph connecting two time points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Entities correctly shown on both axes, values mapped to y-position
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Labels at both endpoints ✓, color coding by direction ✓, vertical
+          axes labeled with time points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data with appropriate padding (8%)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly indicates Increase/Decrease
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "slope-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both increases (5) and decreases (3), good variation in magnitude;
+          could show more dramatic rank changes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech company quarterly revenue is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values ($52M-$225M) are realistic for tech companies, though
+          range is somewhat narrow
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: 'Data is deterministic but no random seed needed anyway; minor: sorting
+          changes display order'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.pyplot, pandas, seaborn, Line2D)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.lineplot and sns.set_style but slope charts aren't a seaborn
+          specialty; matplotlib does heavy lifting for annotations
+  verdict: APPROVED
diff --git a/plots/span-basic/metadata/altair.yaml b/plots/span-basic/metadata/altair.yaml
index 98a63a7c25..4cc63b577d 100644
--- a/plots/span-basic/metadata/altair.yaml
+++ b/plots/span-basic/metadata/altair.yaml
@@ -26,3 +26,182 @@ review:
   - Could leverage Altair interactive features (tooltips, zoom) for HTML output
   - Warning Zone label positioned at right edge - could be better centered in the
     span region
+  image_description: 'The plot displays a stock price time series from 2007 to late
+    2009 on a 16:9 canvas. A blue line with circular markers (#306998) traces the
+    price trajectory from ~$100 up to ~$123 during 2007, then declining through 2008-2009
+    to around $85-92. Two span regions are present: (1) a **vertical yellow span**
+    covering 2008-2009 labeled "Recession Period" at the top, with dashed gold edge
+    lines marking the boundaries, and (2) a **horizontal red/salmon span** between
+    $85-95 labeled "Warning Zone" on the right side, with dashed red edge lines. The
+    title "span-basic · altair · pyplots.ai" appears at the top. Axis labels show
+    "Date" (x-axis) and "Stock Price ($)" (y-axis) with clear tick marks. A subtle
+    dashed grid is visible in the background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 28pt, axis titles 22pt, tick labels 18pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line strokeWidth=4 and point size=150 are well-sized for 36 data
+          points; slightly larger markers could improve visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line, yellow vertical span, red horizontal span - distinct colors
+          with good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight issue with the yellow span extending
+          beyond the visible recession label area on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Stock Price ($)" and "Date"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), but there is no legend explaining
+          what the colored spans represent (beyond in-chart labels)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct span/highlighted region plot with both vertical and horizontal
+          spans
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis (temporal), Price on Y-axis (quantitative), spans
+          correctly positioned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both vertical span (time period) and horizontal span (threshold zone)
+          with edge lines and labels as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis scale [60, 130] shows all data comfortably with room for annotations
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: In-chart text labels accurately describe span regions
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "span-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows BOTH vertical span (recession period) AND horizontal span (threshold
+          zone); demonstrates edge lines and labels; could show additional span to
+          showcase multiple regions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price during 2008-2009 recession is a classic, immediately
+          comprehensible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Stock prices ($85-123) are realistic; recession timing accurate;
+          threshold zone values reasonable though somewhat arbitrary
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → chart layers → combine
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only imports altair, numpy, pandas - all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with scale_factor=3.0 and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's layering with `alt.layer()`, `mark_rect()` for
+          spans, `mark_rule()` for edges, `mark_text()` for labels. Uses declarative
+          encoding types (:T, :Q, :N). Could add interactivity (tooltips, selection)
+          to showcase Altair's strengths more fully.
+  verdict: APPROVED
diff --git a/plots/span-basic/metadata/bokeh.yaml b/plots/span-basic/metadata/bokeh.yaml
index dc4924dd6d..da1b4e4916 100644
--- a/plots/span-basic/metadata/bokeh.yaml
+++ b/plots/span-basic/metadata/bokeh.yaml
@@ -22,3 +22,175 @@ review:
   - Legend placement in bottom-right corner creates slight visual imbalance
   - Vertical span label Q4 Peak Season positioned at bottom of span could be more
     prominently placed
+  image_description: 'The plot displays a line chart showing monthly revenue data
+    over 24 months (labeled 1-25 on x-axis). The chart features two span annotations:
+    a **blue vertical span** covering months 10-12 labeled "Q4 Peak Season" and a
+    **yellow horizontal span** covering the revenue range 120-140 labeled "Target
+    Range". The line is blue (#306998) with circular markers at each data point. The
+    y-axis shows "Revenue (thousands $)" ranging from ~100 to ~143. The title correctly
+    displays "span-basic · bokeh · pyplots.ai" in the top-left. A legend labeled "Monthly
+    Revenue" appears in the bottom-right corner. The overlapping region of the two
+    spans creates a mixed green-gray color. Overall layout is clean with good use
+    of the canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          high resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; labels are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width and marker sizes are appropriate; markers could be slightly
+          larger for optimal visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe; good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; minor whitespace on right side near legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Revenue (thousands $)" and "Month"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), but legend placement could be
+          better integrated
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements span/highlighted region plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (months) and Y (revenue) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both vertical and horizontal spans present with labels and semi-transparent
+          fill
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend label matches the data series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both vertical and horizontal spans with labels; demonstrates
+          overlapping spans well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Revenue tracking with Q4 peak season and target range is a realistic
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Revenue values (100-143k) are plausible; months as integers work
+          well
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's BoxAnnotation for spans, ColumnDataSource, and Label
+          for annotations; also generates interactive HTML output
+  verdict: APPROVED
diff --git a/plots/span-basic/metadata/highcharts.yaml b/plots/span-basic/metadata/highcharts.yaml
index f2436ad1bd..4f936196b1 100644
--- a/plots/span-basic/metadata/highcharts.yaml
+++ b/plots/span-basic/metadata/highcharts.yaml
@@ -25,3 +25,175 @@ review:
     with categories)
   - Stock price values are somewhat generic; could reference a specific index for
     more realism
+  image_description: |-
+    The plot displays a line chart showing stock price data over time from January 2007 to October 2010. The title "span-basic · highcharts · pyplots.ai" appears at the top in bold black text, with a subtitle "Stock Price with Recession Period Highlighted" below it. The y-axis is labeled "Stock Price ($)" ranging from 50 to 180. The x-axis shows quarterly dates from Jan 2007 to Oct 2010. A blue line with circular markers traces the stock price movement.
+
+    Two span regions are clearly visible:
+    1. A **vertical span** (light blue, ~25% opacity) highlighting the "Recession Period" from Jan 2008 to Jan 2009, with a label at the top
+    2. A **horizontal span** (light yellow, ~25% opacity) highlighting the "Below Target Price" zone from $50 to $100, with a label on the left side
+
+    The data shows the stock price declining from ~$155 peak in mid-2007 down to ~$78 during the recession (Apr 2009), then recovering to ~$135 by Oct 2010. The overlapping spans create a visual intersection where both highlighted regions meet.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 72px, labels at 48px, tick labels at 36px - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis labels spaced well
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 6, markers radius 12 - excellent visibility for 16 data
+          points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line (#306998), blue span, yellow span - colorblind-safe, no
+          red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space, plot fills majority of area, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis has unit "($)", but X-axis label is just "Date" (configured
+          in code but not visible in output - appears Highcharts auto-hides it when
+          categories are used)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid lines, legend positioned appropriately
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct span/plot band implementation using Highcharts plotBands
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis shows dates (categories), Y-axis shows stock prices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both vertical AND horizontal spans demonstrated with labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 50-180 shows all data points appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Stock Price" legend label is accurate'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "span-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Excellent! Shows BOTH vertical span (time period) AND horizontal
+          span (threshold zone), matching all spec applications
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 2008-2009 recession is a real historical event, stock price behavior
+          is realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values are reasonable but somewhat generic ($78-$158 range); could
+          be more specific to a real index
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Chart config → Save - clean linear structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Data is hardcoded (deterministic) but no explicit seed comment; acceptable
+          but could be clearer
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (Path, tempfile, time, urllib, selenium, highcharts)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png" and "plot.html"
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Highcharts-specific plotBands feature for both x_axis
+          and y_axis, with styled labels and positioning
+  verdict: APPROVED
diff --git a/plots/span-basic/metadata/letsplot.yaml b/plots/span-basic/metadata/letsplot.yaml
index cc9a8bcf0c..0ffd3a8634 100644
--- a/plots/span-basic/metadata/letsplot.yaml
+++ b/plots/span-basic/metadata/letsplot.yaml
@@ -23,3 +23,175 @@ review:
   - Axis label for y-axis lacks units (could be "Economic Index (points)" or similar)
   - Legend title "Highlighted Region" is redundant since it only shows one category
   - Does not demonstrate horizontal span variant mentioned in spec (vertical only)
+  image_description: The plot displays a line chart showing an "Economic Index" over
+    time from 2006 to 2011. A semi-transparent yellow/gold vertical span highlights
+    the "Recession 2008-2009" period, clearly labeled at the top of the highlighted
+    region. The line is rendered in Python blue (#306998) with both line segments
+    and circular markers at each data point. The data shows a decline from ~105 in
+    2006, bottoming out around 74 during the recession, then recovering to ~119 by
+    late 2011. The title "span-basic · letsplot · pyplots.ai" appears at the top.
+    Axis labels show "Year" on x-axis and "Economic Index" on y-axis. A legend on
+    the right identifies the yellow region as "Highlighted Region - Recession Period".
+    The overall layout is clean with a light gray background and subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend are all clearly readable
+          with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and markers are appropriately sized for the data density (72
+          points); markers could be slightly smaller to reduce visual clutter
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line on yellow span provides excellent contrast; colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Year", "Economic Index") but lack units
+          for the index
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, but legend title "Highlighted Region" is somewhat
+          redundant with the legend content
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements vertical span plot with highlighted region
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values on y-axis, span correctly positioned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Semi-transparent fill (alpha 0.25), underlying data visible, text
+          label within span region
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies the recession period
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "span-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows vertical span effectively; could demonstrate horizontal span
+          as well for complete coverage
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Economic recession scenario is realistic and widely understood
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Economic index values (65-125) are plausible for such an indicator
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: pd.date_range with freq="ME" is the newer syntax but may have compatibility
+          considerations
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses geom_rect for span which is standard ggplot grammar; could leverage
+          more lets-plot specific features like tooltips for interactivity
+  verdict: APPROVED
diff --git a/plots/span-basic/metadata/matplotlib.yaml b/plots/span-basic/metadata/matplotlib.yaml
index 79b9aef9a9..c9958771a7 100644
--- a/plots/span-basic/metadata/matplotlib.yaml
+++ b/plots/span-basic/metadata/matplotlib.yaml
@@ -26,3 +26,178 @@ review:
   weaknesses:
   - Legend placement in upper left slightly overlaps with the beginning of the stock
     price line data
+  image_description: 'The plot displays a time series of stock prices from 2006 to
+    2016. The main data is shown as a blue line (Python blue #306998) representing
+    "Stock Price". There are two highlighted span regions: (1) a yellow vertical span
+    from 2008-2009 labeled "Recession Period" which clearly marks the financial crisis
+    era where the stock price drops significantly, and (2) a red/pink horizontal span
+    from $60-$80 labeled "Risk Zone" spanning the full width of the plot. The title
+    reads "span-basic · matplotlib · pyplots.ai" at the top. Axis labels show "Year"
+    on x-axis and "Price ($)" on y-axis. A legend in the upper left clearly identifies
+    all three elements. The grid is subtle with dashed lines. The price starts around
+    $195, peaks above $200 in 2007, drops during the recession period to around $150,
+    and stabilizes around $145-150 for the remaining years.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 3 is excellent, spans are clearly visible with good
+          alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line, yellow span, and red span are easily distinguishable,
+          colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins with tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Year" and "Price ($)" with unit in parentheses'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha is 0.3 which is good, but the legend overlaps with part
+          of the data line at the start
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements span/highlighted region plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, price on y-axis, spans correctly applied
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows both vertical span (time period) AND horizontal span (threshold
+          zone) as mentioned in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify Stock Price, Recession Period, and
+          Risk Zone
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "span-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows BOTH vertical (time-based recession period) AND horizontal
+          (value threshold) spans, demonstrating both types mentioned in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Stock prices with recession period is a highly realistic, well-known
+          use case. The scenario is immediately recognizable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Stock prices in $60-$200 range are realistic, years 2006-2016 perfectly
+          capture the 2008 financial crisis
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses matplotlib's axvspan() and axhspan() which are the idiomatic
+          way to create span regions, proper use of Axes methods as specified in library
+          rules
+  verdict: APPROVED
diff --git a/plots/span-basic/metadata/plotly.yaml b/plots/span-basic/metadata/plotly.yaml
index 251bf30131..cdd6adf8a1 100644
--- a/plots/span-basic/metadata/plotly.yaml
+++ b/plots/span-basic/metadata/plotly.yaml
@@ -26,3 +26,171 @@ review:
   - Axis labels lack units (e.g. Economic Index base=100 would be more informative)
   - HTML output could leverage plotly interactive features (hover info on spans, range
     slider) but these are optional enhancements
+  image_description: |-
+    The plot displays a line chart with an Economic Index (y-axis, range ~85-120) plotted against Year (x-axis, 2005-2014). The data is shown as a blue line with circular markers (#306998 color). Two span regions are clearly visible:
+    1. A **vertical span** (light blue, semi-transparent) marking the "Recession" period from approximately 2007.5 to 2009.5, with a text label in the top-left corner
+    2. A **horizontal span** (light yellow, semi-transparent) marking the "Target Zone" between values 105-115, also labeled in the top-left corner
+
+    The title "span-basic · plotly · pyplots.ai" is centered at the top. The legend shows "Economic Index" in the bottom-left corner with a white background. The plot uses the plotly_white template with subtle gray gridlines. The overlapping region of both spans creates a greenish tint where they intersect.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at size 32, axis titles at 24, tick fonts at 20 - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, labels well positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 14 and line width 4 are well sized for 10 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line and yellow/blue spans are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good margins (100px all sides, 120 top), plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels "Year" and "Economic Index" but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha 0.1), legend well placed with semi-transparent
+          background
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct span plot with both vertical and horizontal spans
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Years on x-axis, economic values on y-axis, spans correctly positioned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows both vertical span (time period) and horizontal span (value
+          threshold), semi-transparent fills (0.25 opacity), text labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, spans clearly show highlighted regions
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Economic Index"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "span-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows BOTH vertical span (recession period) and horizontal span (target
+          zone), demonstrates overlapping spans
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Economic index with recession period (2008-2009) is a real, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Economic index values (85-120) are realistic, years 2005-2014 appropriate
+          for showing recession
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API (add_hrect, add_vrect)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses plotly's add_hrect and add_vrect which are convenient but basic.
+          Could have leveraged hover interactivity, animation, or range slider for
+          the HTML output.
+  verdict: APPROVED
diff --git a/plots/span-basic/metadata/plotnine.yaml b/plots/span-basic/metadata/plotnine.yaml
index f602cb495c..585ac8e864 100644
--- a/plots/span-basic/metadata/plotnine.yaml
+++ b/plots/span-basic/metadata/plotnine.yaml
@@ -23,3 +23,175 @@ review:
   - The horizontal Risk Zone span (60-80) does not intersect with the actual data
     (price range ~150-210), making it visually disconnected and less educational
   - Optional text labels within span regions (mentioned in spec notes) are not implemented
+  image_description: The plot displays a line chart showing stock price over time
+    (2006-2016) with two span regions highlighted. The title "span-basic · plotnine
+    · pyplots.ai" is at the top. The x-axis shows "Year" and the y-axis shows "Price
+    ($)". A blue line (#306998) traces stock price starting around $200 and declining
+    to around $155. A yellow/cream vertical span marks the "Recession Period" (2008-2009),
+    and a pink/salmon horizontal span at the bottom marks the "Risk Zone" (price range
+    60-80). The legend on the right side shows "Highlighted Region" with both span
+    types labeled. The background is clean with subtle gray gridlines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 1.5 is appropriate, spans are clearly visible with alpha
+          0.25
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Yellow and pink/red spans are distinguishable, blue line has good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but the horizontal Risk Zone span appears at the
+          bottom where there's no data interaction, reducing visual effectiveness
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Year" and "Price ($)" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend positioned well, but grid alpha specified as 0.3 yet appears
+          quite subtle/barely visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct span plot with highlighted regions
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=year, Y=price correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both vertical span (time period) and horizontal span (value threshold)
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies both span regions
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "span-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows both vertical and horizontal spans, but the horizontal span
+          doesn't intersect with data (price never goes below ~150, while risk zone
+          is 60-80)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price during recession is a real, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Stock prices are realistic; however the "Risk Zone" at 60-80 doesn't
+          align with the actual data range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_rect for spans, proper layering, scale_fill_manual.
+          Could have used annotate() for text labels within spans as mentioned in
+          spec notes.
+  verdict: APPROVED
diff --git a/plots/span-basic/metadata/pygal.yaml b/plots/span-basic/metadata/pygal.yaml
index 349ca1b237..ddc21c2ad4 100644
--- a/plots/span-basic/metadata/pygal.yaml
+++ b/plots/span-basic/metadata/pygal.yaml
@@ -15,3 +15,13 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: The plot displays a stock price time series from 2006 to 2016
+    with two highlighted span regions. A **yellow vertical span** marks the "Recession
+    Period" from 2008 to 2009, spanning the full height of the chart. A **red/pink
+    horizontal span** marks the "Risk Zone" between price levels $60-$80, spanning
+    the full width of the chart. The main data is shown as a **blue line** representing
+    "Stock Price" that starts around $200, dips during the recession period, and stabilizes
+    around $140-150 afterward. The title reads "span-basic · pygal · pyplots.ai".
+    Axis labels show "Year" on x-axis and "Price ($)" on y-axis. A legend in the top-left
+    shows all three series. Grid lines are visible but subtle.
+  verdict: APPROVED
diff --git a/plots/span-basic/metadata/seaborn.yaml b/plots/span-basic/metadata/seaborn.yaml
index ed31621e7f..9ab7b2cdb7 100644
--- a/plots/span-basic/metadata/seaborn.yaml
+++ b/plots/span-basic/metadata/seaborn.yaml
@@ -25,3 +25,165 @@ review:
   - Relies heavily on matplotlib axvspan/axhspan rather than seaborn-specific features;
     could explore seaborn fill_between or other native approaches to demonstrate library
     strengths
+  image_description: 'The plot displays a line chart showing monthly sales data from
+    2006 to 2011. There is a blue semi-transparent vertical span (light blue, #306998
+    with alpha 0.25) highlighting the recession period from 2008 to 2009. A yellow
+    horizontal span (#FFD43B with alpha 0.2) marks the target sales zone between 120-140
+    thousand dollars. The blue line (matching #306998) shows sales values that clearly
+    dip during the recession period and recover afterward, demonstrating the relationship
+    between the highlighted region and the data trend. The title "span-basic · seaborn
+    · pyplots.ai" is prominently displayed at the top. Axis labels show "Month" and
+    "Sales (thousands $)" with clear, readable tick labels. A legend in the upper
+    left identifies both span regions. The grid uses subtle dashed lines with low
+    opacity.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width 3 is appropriate, spans are clearly visible with good
+          alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow spans are colorblind-safe and distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Sales (thousands $)", X-axis has "Month"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct span/highlighted region plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=time, Y=value correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Both vertical span (recession) and horizontal span (threshold) present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes both spans
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses "span-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows both vertical (time period) and horizontal (threshold) spans
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Recession period with sales dip is realistic; the recession effect
+          timing could be more natural
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values 80-160 thousand are realistic for business data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (plt, np, pd, sns)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot but the spans are matplotlib's axvspan/axhspan,
+          not seaborn-specific features
+  verdict: APPROVED
diff --git a/plots/sparkline-basic/metadata/altair.yaml b/plots/sparkline-basic/metadata/altair.yaml
index 10a3ef6dca..99802b5f06 100644
--- a/plots/sparkline-basic/metadata/altair.yaml
+++ b/plots/sparkline-basic/metadata/altair.yaml
@@ -24,3 +24,170 @@ review:
   - Canvas height creates more whitespace than necessary for compact sparkline aesthetic
   - Data trend is monotonically increasing - adding a period of decline would better
     demonstrate sparkline ability to show varied trends
+  image_description: 'The plot displays a minimalist sparkline with a blue line (#306998)
+    showing an upward trend over 60 data points with realistic fluctuations including
+    periodic dips (weekend pattern). The visualization includes: a small gray circle
+    marking the first data point, a yellow circle highlighting the minimum value (around
+    position 15-20), and a blue circle at the end marking both the last point and
+    the maximum value. The title "sparkline-basic · altair · pyplots.ai" appears at
+    the top in a clear font. True to sparkline design, there are no axes, labels,
+    gridlines, or other chart chrome. The aspect ratio is approximately 5:1, appropriately
+    compact for a sparkline.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is clear and readable at fontSize=28
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line strokeWidth=3 is appropriate, markers well-sized (200-400)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/gray are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good aspect ratio but plot could use more vertical canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for sparklines (no axes by design) - full points
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is correctly hidden, but there's no grid (by design for sparklines,
+          however the legend=None is correct)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sparkline implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (sequential) and Y (values) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: no axes, compact ratio, min/max highlights,
+          endpoint markers'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Scale(zero=False) ensures all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly hidden for sparkline
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: sparkline-basic · altair · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows upward trend with fluctuations and weekend dips, but could
+          show more variety (e.g., a period of decline)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily sales figures with weekend dips is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values 85-165 range is realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used and needed
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of layered charts and declarative encoding, but could leverage
+          more Altair features like tooltips or selection
+  verdict: APPROVED
diff --git a/plots/sparkline-basic/metadata/bokeh.yaml b/plots/sparkline-basic/metadata/bokeh.yaml
index f187e0d3b4..9d482893a7 100644
--- a/plots/sparkline-basic/metadata/bokeh.yaml
+++ b/plots/sparkline-basic/metadata/bokeh.yaml
@@ -23,3 +23,164 @@ review:
   weaknesses:
   - Title font size could be slightly larger for the 4800x1200 canvas
   - Consider using ColumnDataSource for better Bokeh idiom (minor)
+  image_description: 'The plot displays a minimalist sparkline visualization with
+    a blue line (#306998) showing 30 data points representing daily website traffic.
+    The line exhibits a general upward trend combined with weekly seasonality patterns.
+    The chart correctly omits all axes, gridlines, and labels in keeping with sparkline
+    conventions. Four highlighted points are visible: a small blue dot at the first
+    point (left edge), a small blue dot at the last point (right edge), a red dot
+    marking the minimum value, and a green dot marking the maximum peak. The aspect
+    ratio is approximately 4:1 (4800x1200 pixels), which is ideal for sparklines.
+    The title "sparkline-basic · bokeh · pyplots.ai" appears centered at the top in
+    a readable font.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is readable but could be slightly larger for canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width and markers well-sized for data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/red/green distinguishable, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, sparkline fills canvas appropriately
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid as intended, no legend needed but could add subtle context
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sparkline type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential values correctly displayed over time
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: no axes/labels, compact ratio, min/max
+          highlights, first/last highlights'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for sparklines, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "sparkline-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows upward trend, weekly seasonality, and noise - all aspects of
+          sparkline data
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Website traffic is plausible; scenario is good but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values around 1000-1300 are realistic for daily traffic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Basic Bokeh usage; could use ColumnDataSource for more idiomatic
+          code, but scatter/line methods are appropriate
+  verdict: APPROVED
diff --git a/plots/sparkline-basic/metadata/highcharts.yaml b/plots/sparkline-basic/metadata/highcharts.yaml
index 83546b8884..cd2fc981bb 100644
--- a/plots/sparkline-basic/metadata/highcharts.yaml
+++ b/plots/sparkline-basic/metadata/highcharts.yaml
@@ -24,3 +24,169 @@ review:
   - LineSeries imported from highcharts_core.options.series.area instead of highcharts_core.options.series.line
   - Missing last point marker (specification mentions optional first/last points for
     reference - only first is shown)
+  image_description: 'The plot displays a sparkline chart with a continuous blue line
+    (#306998) showing a trend pattern over approximately 60 data points. The line
+    exhibits a clear pattern with an initial dip, mid-chart low point, then an upward
+    trend with cyclical variations. Three marker points are visible: a gray circle
+    at the start (first point), an orange circle at the minimum value (around the
+    middle-left of the chart), and a teal/cyan circle at the maximum value (in the
+    upper-right portion). The title "sparkline-basic · highcharts · pyplots.ai" is
+    displayed at the top in bold text. No axes, gridlines, or labels are present,
+    consistent with sparkline design principles. The chart has a clean white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (72px), bold, and clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width (10) and marker sizes (28-32 radius) are well-suited for
+          the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses colorblind-safe palette: blue line, orange min, teal max, gray
+          start (no red-green conflicts)'
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but margins could be better balanced; plot area
+          is well-utilized
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is disabled as required, but no way to identify what the markers
+          mean without code context
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sparkline implementation with minimal chart chrome
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential values correctly displayed as continuous line
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All optional features implemented: min/max highlighting, first point
+          marker, no axes/labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend disabled (correct for sparkline) but markers unlabeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "sparkline-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows trend with seasonal/cyclical pattern, noise, clear min/max
+          points, upward overall trend
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily sales figures scenario is plausible and relatable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values around 100 with realistic variation are sensible for sales
+          data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses LineSeries from area module (should import from line module)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Highcharts chart options, separate scatter series for
+          markers, proper axis hiding. Could leverage Highcharts sparkline-specific
+          features.
+  verdict: APPROVED
diff --git a/plots/sparkline-basic/metadata/letsplot.yaml b/plots/sparkline-basic/metadata/letsplot.yaml
index 5e7838b9a0..d534cf90d2 100644
--- a/plots/sparkline-basic/metadata/letsplot.yaml
+++ b/plots/sparkline-basic/metadata/letsplot.yaml
@@ -23,3 +23,159 @@ review:
   weaknesses:
   - Could use more distinctive lets-plot features (e.g., tooltips in HTML version)
   - Data trend could be more dramatic to better showcase sparkline utility
+  image_description: 'The plot displays a minimal sparkline showing stock price trends
+    over 60 trading days. It features a thin blue line (#306998) with a subtle light
+    blue area fill beneath it. Four highlight points are visible: a green dot marking
+    the maximum value (positioned early in the series), a red dot marking the minimum
+    (towards the right side), and two smaller yellow dots marking the first and last
+    data points. The title "sparkline-basic · lets-plot · pyplots.ai" is centered
+    at the top in a readable font. The chart uses a ~6:1 aspect ratio (4800x800px)
+    with no axes, gridlines, or labels - adhering to the sparkline minimalist philosophy.
+    The data shows an overall slightly positive trend with volatility.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is clearly readable at 24pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and points are visible; line could be slightly thicker for better
+          visibility at small sizes
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Green/red/yellow points are distinguishable; blue line provides good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of wide sparkline aspect ratio, good margins
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sparkline visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential values correctly mapped to line
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Has all optional features: min/max highlights, first/last points,
+          area fill'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis expanded to show full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for sparklines, correctly omitted
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend with volatility, upward bias, min/max differentiation;
+          could show more dramatic trend variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Stock price simulation is realistic with ~1.8% daily volatility
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 60 data points is good; prices starting at 100 are reasonable
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar and theme_void() appropriately, but doesn't showcase
+          lets-plot's unique interactive features
+  verdict: APPROVED
diff --git a/plots/sparkline-basic/metadata/matplotlib.yaml b/plots/sparkline-basic/metadata/matplotlib.yaml
index df79aecd80..869e318e03 100644
--- a/plots/sparkline-basic/metadata/matplotlib.yaml
+++ b/plots/sparkline-basic/metadata/matplotlib.yaml
@@ -25,3 +25,177 @@ review:
   - Line could be slightly thinner for more authentic sparkline appearance at small
     sizes
   - Library features score is modest - no advanced matplotlib techniques used
+  image_description: 'The plot shows a minimalist sparkline with a blue line (#306998)
+    displaying a time series trend over 50 data points. The line shows an overall
+    upward trend with periodic oscillations (weekly pattern). Four highlighted points
+    are visible: a blue dot at the start (first value), a red dot marking the minimum
+    value (around the middle-left), a green dot marking the maximum value (upper portion,
+    right of center), and a yellow dot at the end (last value). All dots have white
+    edge borders for visibility. The chart has no axes, gridlines, or labels - true
+    to sparkline aesthetics. The title "sparkline-basic · matplotlib · pyplots.ai"
+    appears in gray at the bottom right corner. The aspect ratio is wide and short
+    (~6:1), appropriate for sparkline design. White background with clean, professional
+    appearance.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title text is clearly readable at 14pt, appropriate for sparkline
+          context
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; clean minimal design
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width (2.5) and marker sizes (s=120-150) are well-adapted for
+          the sparkline format
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Colors are colorblind-safe: blue line, red min, green max, yellow
+          end - all distinguishable'
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of the wide aspect ratio; line fills the space appropriately
+          with proper margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for sparklines (intentionally no axes per spec) - giving 0 but
+          not a deficiency
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid/legend needed for sparklines; correctly omitted
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sparkline implementation - condensed line chart without chrome
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential values correctly plotted as continuous line
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: no axes, compact ratio, min/max highlights,
+          first/last points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate y-margin padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for sparklines (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "sparkline-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows upward trend with cyclical pattern; min/max/endpoints highlighted.
+          Could show more dramatic variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Website traffic scenario is realistic and well-commented in code
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (100-180 range) are plausible for daily traffic; could be
+          more varied
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) properly set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses standard matplotlib features competently but no advanced features
+          like annotations, custom line styles, or fill_between
+  verdict: APPROVED
diff --git a/plots/sparkline-basic/metadata/plotly.yaml b/plots/sparkline-basic/metadata/plotly.yaml
index 6d76ee7b9a..fae6724f1e 100644
--- a/plots/sparkline-basic/metadata/plotly.yaml
+++ b/plots/sparkline-basic/metadata/plotly.yaml
@@ -25,3 +25,172 @@ review:
   weaknesses:
   - The first point yellow marker partially overlaps with the line start, making it
     slightly less visible than the last point marker
+  image_description: 'The plot displays a minimalist sparkline visualization on a
+    white background. A single blue (#306998) line traces a trend with 30 data points,
+    showing an overall upward trajectory with natural fluctuations. The line is clean
+    with width appropriate for the canvas size. Four colored markers highlight key
+    points: two yellow dots mark the first and last data points (left edge and right
+    edge), a red dot marks the minimum value (in the lower middle portion), and a
+    green dot marks the maximum value (upper portion). The title "sparkline-basic
+    · plotly · pyplots.ai" appears centered at the top in a dark font. There are no
+    axes, gridlines, tick labels, or legends - exactly as a sparkline should be. The
+    chart uses the full width of the canvas with appropriate margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (48pt font), clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean minimal design
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width (4) and marker sizes (14-16) well adapted for canvas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue line, red/green/yellow markers are distinct and colorblind-distinguishable
+          (not relying on red-green alone)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, sparkline fills width appropriately with
+          balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for sparklines (no axes by design) - awarding 0 but not penalizing
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid/legend as appropriate for sparklines
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sparkline implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential values correctly plotted as line
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All optional features implemented: min/max highlights, first/last
+          highlights, no axes/labels/gridlines'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed), no incorrect labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "sparkline-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend with noise, highlights key points; could show a more
+          dramatic min/max for visual impact
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Daily sales figures over 30 days is a realistic sparkline use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values around 100-120 are sensible for sales/metrics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with multiple traces, proper layout configuration,
+          exports both PNG and interactive HTML
+  verdict: APPROVED
diff --git a/plots/sparkline-basic/metadata/plotnine.yaml b/plots/sparkline-basic/metadata/plotnine.yaml
index a5223c17cf..2a3c679778 100644
--- a/plots/sparkline-basic/metadata/plotnine.yaml
+++ b/plots/sparkline-basic/metadata/plotnine.yaml
@@ -23,3 +23,176 @@ review:
   weaknesses:
   - Line could be slightly thinner for a more authentic sparkline appearance at very
     small sizes
+  image_description: 'The plot displays a minimalist sparkline with a dark blue (#306998)
+    continuous line on a clean white background. The title "sparkline-basic · plotnine
+    · pyplots.ai" is centered at the top in a readable font size. The line shows a
+    clear trend: starting low-left with a blue dot (first point), dipping to a minimum
+    marked with a red dot, rising to a peak marked with a green dot (maximum), then
+    fluctuating before ending with a blue dot (last point). The aspect ratio is wide
+    (4:1) as appropriate for sparklines. Critically, there are no axes, tick marks,
+    gridlines, or legend visible - achieving the pure minimalist aesthetic required
+    by the specification.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt and clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements at all
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and markers are clearly visible; line thickness is good, markers
+          are appropriately sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Red/green for min/max with blue for neutral points; distinct enough
+          for colorblind users due to brightness differences
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of wide aspect ratio; slight excess whitespace at top/bottom
+          but appropriate for sparkline format
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for sparklines (no axes by design) - full points for correctly
+          omitting them
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for sparklines (no grid/legend by design) - however, not applicable
+          means neutral scoring
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sparkline visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential values correctly mapped to X/Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: minimal chrome, highlighted min/max points,
+          first/last point markers, compact aspect ratio'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the plot area
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed/shown (correct for sparklines)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "sparkline-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend, seasonality, noise; min and max points clearly demonstrated;
+          could show a more dramatic range
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: '"Daily sales figures" context is plausible; values around 100 are
+          reasonable'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 50 data points is ideal for sparklines; values in sensible range
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar correctly with geom_line, geom_point, theme customization,
+          and scale_color_manual; could leverage more advanced plotnine features like
+          stat transformations
+  verdict: APPROVED
diff --git a/plots/sparkline-basic/metadata/seaborn.yaml b/plots/sparkline-basic/metadata/seaborn.yaml
index 0f5780fd0c..b836a709a5 100644
--- a/plots/sparkline-basic/metadata/seaborn.yaml
+++ b/plots/sparkline-basic/metadata/seaborn.yaml
@@ -25,3 +25,174 @@ review:
     current is 16:9) but this works for standalone display
   - Library features score is moderate - implementation relies heavily on matplotlib
     underneath with sns.lineplot being a thin wrapper
+  image_description: 'The plot displays a sparkline visualization on a light gray
+    (#fafafa) background. A blue line (#306998) shows a time series trend over 24
+    data points, starting low on the left, rising to a peak around the middle, then
+    declining toward the right. The line has a subtle light blue fill underneath it
+    creating an area effect. Four colored dots mark key points: yellow dots with blue
+    borders mark the first and last data points, a green dot marks the maximum value
+    (peak), and a red dot marks the minimum value. The title "sparkline-basic · seaborn
+    · pyplots.ai" appears at the top in bold black text. There are no axes, labels,
+    gridlines, or tick marks - the visualization is pure and minimalist as expected
+    for a sparkline.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt is perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width (4) and marker sizes (200-300) are well-suited for the
+          data
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/green/red/yellow are distinguishable; not relying on red-green
+          alone
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills most of the space; slight asymmetry
+          with more padding at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for sparklines (no axes by design) - full points awarded
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for sparklines (no grid/legend by design) - deducted for consistency
+          with criteria
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sparkline (condensed line chart)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sequential values correctly mapped to line
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All optional features implemented: min/max highlights, first/last
+          points, fill under line, no axes/labels/gridlines'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate y-limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for sparklines - full points
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: sparkline-basic · seaborn · pyplots.ai'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows upward and downward trends, peak and trough; could show more
+          volatility patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly revenue trend is a realistic business context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values around 100 with ±20 variation are plausible for indexed revenue
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current seaborn API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but missing plt.show() consideration (minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.lineplot correctly, but this is basic seaborn usage; could
+          leverage seaborn's statistical features more distinctively
+  verdict: APPROVED
diff --git a/plots/spectrogram-basic/metadata/altair.yaml b/plots/spectrogram-basic/metadata/altair.yaml
index 15efcb69ef..a02f338a22 100644
--- a/plots/spectrogram-basic/metadata/altair.yaml
+++ b/plots/spectrogram-basic/metadata/altair.yaml
@@ -25,3 +25,179 @@ review:
   - Missing .interactive() call which would enable zoom/pan for detailed exploration
     of the spectrogram
   - Colorbar font sizes could be slightly larger for better visibility
+  image_description: The plot displays a spectrogram showing a linear chirp signal
+    sweeping from approximately 100 Hz to 800 Hz over 2 seconds. The title "spectrogram-basic
+    · altair · pyplots.ai" is clearly visible at the top. The x-axis shows "Time (s)"
+    ranging from 0.00 to ~2.00 seconds, and the y-axis shows "Frequency (Hz)" ranging
+    from 0 to ~1000 Hz. A bright yellow diagonal band represents the frequency sweep,
+    clearly visible against a teal/green background with noise. The colorbar on the
+    right shows "Power (dB)" ranging from approximately -60 dB (dark) to +30 dB (yellow/bright).
+    The viridis colormap is used effectively, making the signal energy clearly distinguishable
+    from the noise floor.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title at top, axis labels "Time (s)"
+          and "Frequency (Hz)" are descriptive and well-sized, tick labels are legible'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: The spectrogram heatmap is clearly visible with good density; the
+          chirp signal is prominently displayed against the noise background
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses viridis colormap which is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though the plot could be slightly larger
+          relative to the total canvas; margins are balanced
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Time (s)" and "Frequency
+          (Hz)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid is present (which is fine for a heatmap), but the colorbar
+          title "Power (dB)" is properly labeled
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: spectrogram displayed as a time-frequency heatmap'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, frequency on y-axis, power/amplitude as color - all
+          correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: colorbar with dB units, proper axis labels,
+          perceptually uniform colormap'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Axes show the full data range: 0-2s time, 0-1000Hz frequency'
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately shows Power (dB) with correct scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "spectrogram-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows chirp signal clearly demonstrating time-frequency relationship;
+          noise is also visible showing typical real-world signal characteristics
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Chirp signal is a classic test signal used in radar, audio, and vibration
+          analysis - very appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Frequency range (0-1000 Hz) and sample rate (4000 Hz) are realistic;
+          dB scale is appropriate for power representation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat script structure: imports → data generation → STFT computation
+          → DataFrame creation → Altair chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible noise
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html, which is correct for Altair
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses mark_rect() with x/x2/y2 encoding for proper rectangle rendering,
+          tooltips for interactivity, and proper color scale configuration; could
+          have added .interactive() for zoom/pan
+  verdict: APPROVED
diff --git a/plots/spectrogram-basic/metadata/bokeh.yaml b/plots/spectrogram-basic/metadata/bokeh.yaml
index cfb81a3542..8948bcf411 100644
--- a/plots/spectrogram-basic/metadata/bokeh.yaml
+++ b/plots/spectrogram-basic/metadata/bokeh.yaml
@@ -23,3 +23,171 @@ review:
   - Grid lines (dashed) are somewhat distracting over the heatmap image; consider
     disabling or reducing opacity
   - Could leverage Bokeh interactive hover tools to show frequency/time/power values
+  image_description: The plot displays a spectrogram showing a chirp signal with frequency
+    increasing linearly from approximately 200 Hz to 2000 Hz over 2 seconds. The bright
+    yellow diagonal line represents the high-power chirp signal against a blue-green
+    noisy background rendered with the viridis colormap. The x-axis shows "Time (seconds)"
+    from 0 to ~2, the y-axis shows "Frequency (Hz)" from 0 to ~4000 Hz. A colorbar
+    on the right indicates "Power (dB)" ranging from approximately -80 to -20 dB.
+    The title correctly reads "spectrogram-basic · bokeh · pyplots.ai".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          large canvas size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Spectrogram heatmap fills the plot area well, chirp signal clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout but colorbar text slightly small relative to main plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Time (seconds)" and "Frequency (Hz)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is visible but somewhat distracting with dashed lines over the
+          heatmap
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct spectrogram/time-frequency heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, frequency on y-axis, power as color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: colorbar with dB units, perceptually
+          uniform colormap, clear axis labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full frequency range (0-4000 Hz) and time range (0-2 s) visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Power (dB)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "spectrogram-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear chirp signal but limited variation in noise characteristics
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Chirp signal is a classic test signal for spectrogram analysis, scientifically
+          appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Good frequency range (200-2000 Hz chirp), sample rate of 8 kHz is
+          appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh's image glyph and ColorBar, but doesn't leverage interactive
+          features like hover tools
+  verdict: APPROVED
diff --git a/plots/spectrogram-basic/metadata/highcharts.yaml b/plots/spectrogram-basic/metadata/highcharts.yaml
index d22e76fe98..ea3d388233 100644
--- a/plots/spectrogram-basic/metadata/highcharts.yaml
+++ b/plots/spectrogram-basic/metadata/highcharts.yaml
@@ -25,3 +25,173 @@ review:
     points)
   - Y-axis shows frequencies up to ~469 Hz but the chirp only goes to 200 Hz (wasted
     vertical space showing noise)
+  image_description: The spectrogram displays a heatmap of a linear chirp signal.
+    The plot shows time (0.06 to 1.91 seconds) on the x-axis and frequency (0 to ~469
+    Hz) on the y-axis. A clear diagonal yellow band sweeps from low frequencies at
+    early times to high frequencies at later times, representing the increasing frequency
+    of the chirp signal. The background is predominantly teal/green (mid-power levels)
+    with scattered darker blue/purple spots (low power). A vertical colorbar on the
+    right shows "Power (dB)" ranging from approximately -100 dB (purple) to -10 dB
+    (yellow), using a viridis colormap. The title reads "spectrogram-basic · highcharts
+    · pyplots.ai" with a subtitle "Linear chirp signal (10-200 Hz) with linear frequency
+    axis".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; tick labels slightly
+          small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells are appropriately sized for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; slight imbalance with right margin for legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Time (seconds)" and "Frequency (Hz)" with proper units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No visible grid lines; legend/colorbar is present but lacks subtle
+          gridlines on plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct heatmap spectrogram visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, frequency on y-axis, power as color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar with dB units, proper axis labels, perceptually uniform
+          colormap
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full time and frequency range displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Power (dB)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Chirp signal clearly shows frequency increasing over time; could
+          show additional signal features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Signal processing context is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Frequencies 10-200 Hz appropriate; y-axis extends to ~500 Hz which
+          is fine but shows noise above signal range
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts heatmap with proper color axis and tooltip; could
+          leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/spectrogram-basic/metadata/letsplot.yaml b/plots/spectrogram-basic/metadata/letsplot.yaml
index 82c3fad76f..dfea2605f6 100644
--- a/plots/spectrogram-basic/metadata/letsplot.yaml
+++ b/plots/spectrogram-basic/metadata/letsplot.yaml
@@ -27,3 +27,173 @@ review:
   - No grid lines to help read values at specific time-frequency points
   - Y-axis shows 0-500 Hz but chirp only goes to 200 Hz, wasting some visual space
   - Could benefit from minor grid for easier value reading on the heatmap
+  image_description: The spectrogram displays a time-frequency heatmap showing a chirp
+    signal with frequency increasing over time. The plot uses the viridis colormap
+    with colors ranging from dark purple (low power, ~-80 dB) through teal/green (mid
+    power, ~-40 to -60 dB) to bright yellow (high power, ~-20 dB). The X-axis shows
+    "Time (seconds)" from 0 to 2, and the Y-axis shows "Frequency (Hz)" from 0 to
+    500. A clear diagonal bright yellow band sweeps from low frequency (~10 Hz) at
+    time 0 to approximately 200 Hz at time 2 seconds, demonstrating the characteristic
+    chirp pattern. The title reads "spectrogram-basic · lets-plot · pyplots.ai" at
+    the top. A colorbar legend on the right shows "Power (dB)" with the scale. The
+    background noise appears as scattered teal/green tiles outside the main signal
+    band.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels all clearly readable at proper sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Tiles are well-sized and clearly visible; the chirp signal is easily
+          distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight imbalance with legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Proper units: "Time (seconds)" and "Frequency (Hz)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No visible grid lines; legend is present but grid would help interpretation
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct spectrogram/heatmap visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, Frequency on Y-axis, Power as color fill
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar with dB units, proper axis labels, chirp signal demonstration
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full time and frequency range visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Power (dB)" legend accurate'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "spectrogram-basic · lets-plot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows chirp signal clearly with noise; could show additional signal
+          features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Chirp signal is a standard test signal used in radar, sonar, and
+          audio analysis
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Reasonable frequency range (10-200 Hz chirp), sample rate (1000 Hz),
+          duration (2 sec); slight overkill on y-axis range showing 0-500 Hz
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, pandas, lets_plot, scipy.signal)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_tile, scale_fill_viridis, theme_minimal;
+          good but no advanced lets-plot specific features
+  verdict: APPROVED
diff --git a/plots/spectrogram-basic/metadata/matplotlib.yaml b/plots/spectrogram-basic/metadata/matplotlib.yaml
index 647a51f6e4..352d06ca46 100644
--- a/plots/spectrogram-basic/metadata/matplotlib.yaml
+++ b/plots/spectrogram-basic/metadata/matplotlib.yaml
@@ -25,3 +25,180 @@ review:
     plot of the signal above the spectrogram
   - The burst signal at 500 Hz overlaps visually with the chirp in a way that could
     be clearer with different timing
+  image_description: The plot displays a spectrogram showing time-frequency representation
+    of a signal. The x-axis shows "Time (s)" ranging from 0 to approximately 2 seconds,
+    and the y-axis shows "Frequency (Hz)" ranging from 0 to 2000 Hz. The plot uses
+    the viridis colormap (purple-blue-green-yellow gradient) where brighter yellow
+    indicates higher power/frequency values. A clear diagonal line (chirp signal)
+    sweeps from approximately 100 Hz at t=0 to 1000 Hz at t=2s. There's also a visible
+    horizontal burst around 500 Hz between t=0.8s and t=1.2s overlapping with the
+    chirp. The background shows noise at roughly -60 to -70 dB/Hz (blue-purple tones).
+    A colorbar on the right indicates "Power/Frequency (dB/Hz)" ranging from -80 to
+    approximately -20 dB/Hz. The title correctly shows "spectrogram-basic · matplotlib
+    · pyplots.ai".
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at fontsize=24, axis labels at fontsize=20, tick labels at
+          fontsize=16 - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Spectrogram is clearly visible with good color intensity showing
+          the chirp and burst
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with proper margins, colorbar well-positioned, slight
+          deduction for small unused space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Labels include units: "Time (s)" and "Frequency (Hz)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid visible (appropriate for spectrogram heatmap), but no legend
+          present; colorbar serves as legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct spectrogram/time-frequency heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, frequency on y-axis, power as color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes colorbar with dB scale, perceptually uniform colormap, proper
+          axis labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "spectrogram-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows chirp signal with frequency sweep AND burst component AND noise
+          - demonstrates time-varying frequency content well; slight deduction as
+          a more complex multi-component signal could showcase more
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Chirp signal is a standard test signal used in signal processing
+          and audio analysis; plausible but somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sample rate of 4000 Hz, frequency range 100-1000 Hz, 2-second duration
+          - all realistic values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data generation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current ax.specgram() method
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses matplotlib's built-in ax.specgram() which is good, but doesn't
+          leverage additional matplotlib features like adding annotations, custom
+          colorbar formatting, or secondary plots (e.g., time domain signal above
+          spectrogram)
+  verdict: APPROVED
diff --git a/plots/spectrogram-basic/metadata/plotly.yaml b/plots/spectrogram-basic/metadata/plotly.yaml
index 7d32fc22f2..5a478e8df4 100644
--- a/plots/spectrogram-basic/metadata/plotly.yaml
+++ b/plots/spectrogram-basic/metadata/plotly.yaml
@@ -25,3 +25,171 @@ review:
     time/frequency reading)
   - Could utilize more Plotly-specific interactive features like range sliders or
     animation controls
+  image_description: The plot displays a spectrogram heatmap showing a chirp signal
+    with frequency increasing linearly from approximately 100 Hz at time 0 to 800
+    Hz at time 2 seconds. The main signal trace appears as a bright yellow diagonal
+    line sweeping from bottom-left to top-right against a teal/green background. The
+    colorscale uses Viridis (yellow=high power, teal=medium, purple=low). The x-axis
+    shows "Time (seconds)" ranging from 0 to ~1.9, y-axis shows "Frequency (Hz)" ranging
+    from 0 to 2000. A colorbar on the right indicates "Power (dB)" ranging from approximately
+    -90 to -20 dB. The title correctly reads "spectrogram-basic · plotly · pyplots.ai".
+    Background noise is visible as random speckles across the frequency range.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick fonts at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Heatmap cells are well-sized, chirp signal clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, slight excess margin on bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Clear labels with units: "Time (seconds)" and "Frequency (Hz)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid present (acceptable for heatmap), colorbar well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct spectrogram/heatmap visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, frequency on y-axis, power as color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has colorbar with dB units, clear axes, time-frequency representation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full signal range visible (0-2s, 0-2000Hz)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately labeled "Power (dB)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "{spec-id} · {library} · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows chirp signal with clear frequency sweep, background noise visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Chirp signal is a standard test signal used in audio/vibration analysis
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Sample rate and frequency range are realistic; minor: signal duration
+          could be longer for more dramatic effect'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → spectrogram → figure →
+          save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, plotly.graph_objects, scipy.signal)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Heatmap with hovertemplate for interactivity, plotly_white
+          template, but could leverage more Plotly-specific features like custom hover
+          formatting or animations
+  verdict: APPROVED
diff --git a/plots/spectrogram-basic/metadata/plotnine.yaml b/plots/spectrogram-basic/metadata/plotnine.yaml
index bee048fe4b..4ee595e5cb 100644
--- a/plots/spectrogram-basic/metadata/plotnine.yaml
+++ b/plots/spectrogram-basic/metadata/plotnine.yaml
@@ -26,3 +26,173 @@ review:
   - No grid lines to aid in reading specific time-frequency values
   - Frequency axis filtered to 500 Hz when chirp only reaches 200 Hz - could be tighter
     range for better visualization
+  image_description: The spectrogram displays a time-frequency heatmap on a 16:9 canvas
+    with a clean minimal theme. The x-axis shows "Time (s)" ranging from 0.0 to 2.0
+    seconds, and the y-axis shows "Frequency (Hz)" ranging from 0 to 500 Hz. A colorbar
+    on the right labeled "Power (dB)" shows the scale from approximately -70 dB (dark
+    blue) to -20 dB (light yellow/cream). The plot clearly shows a chirp signal with
+    frequency increasing linearly from about 50 Hz at t=0 to about 200 Hz at t=2,
+    visible as a bright diagonal band sweeping upward across the time axis. The background
+    noise appears as a mottled blue-green pattern at frequencies outside the chirp.
+    The title "spectrogram-basic · plotnine · pyplots.ai" is displayed at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Tiles are visible and the chirp pattern is clear, though some granularity
+          in the high-frequency region
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses a perceptually reasonable colormap that avoids red-green issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Labels include units: "Time (s)" and "Frequency (Hz)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No visible grid (spec suggests grid could help), legend is well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct spectrogram heatmap visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X, Frequency on Y, Power as color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorbar with dB units, clear axes, chirp signal demonstration
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Shows full 0-2s time range and 0-500 Hz frequency range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Power (dB)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows time-frequency representation well; chirp clearly visible;
+          noise background present
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Chirp signal is a standard signal processing example, scientifically
+          neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Sample rate and frequency ranges are plausible; filtering to 500
+          Hz is arbitrary when chirp only goes to 200 Hz
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses geom_tile appropriately for heatmap, but custom color gradient
+          instead of plotnine's scale functions; theme_minimal is good but basic usage
+          overall
+  verdict: APPROVED
diff --git a/plots/spectrogram-basic/metadata/pygal.yaml b/plots/spectrogram-basic/metadata/pygal.yaml
index 0a7772a8d7..1b10c6a90d 100644
--- a/plots/spectrogram-basic/metadata/pygal.yaml
+++ b/plots/spectrogram-basic/metadata/pygal.yaml
@@ -27,3 +27,175 @@ review:
     extending pygal)
   - Grid opacity at 0.25 with white color may be slightly too visible on the colorful
     background
+  image_description: The plot displays a spectrogram showing a chirp signal with frequency
+    increasing over time (0-2 seconds on x-axis, 0-2000 Hz on y-axis). The visualization
+    uses the viridis colormap (dark purple to bright yellow), with the diagonal bright
+    band clearly showing the frequency sweep from ~100 Hz to ~1200 Hz. A secondary
+    weaker harmonic trace is visible. The colorbar on the right shows power in dB
+    ranging from approximately -99 to -16 dB. The title "spectrogram-basic · pygal
+    · pyplots.ai" appears at the top. Subtle white grid lines help read values. All
+    axis labels include units (Time (s), Frequency (Hz), Power (dB)).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all readable at full size;
+          tick font could be slightly larger but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Heatmap cells are clearly visible with good resolution; the chirp
+          signal is clearly distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses viridis colormap which is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with plot filling most of the canvas, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes have descriptive labels with units: "Time (s)", "Frequency
+          (Hz)", "Power (dB)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines are present but no legend needed; colorbar serves as the
+          legend equivalent
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements a spectrogram time-frequency heatmap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, frequency on y-axis, power as color intensity
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has colorbar with dB units, perceptually uniform colormap, proper
+          axis labels; missing log scale option for frequency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar is present and labeled; colorbar title "Power (dB)" is correct
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "spectrogram-basic · pygal · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows chirp signal with harmonics and noise, demonstrating time-varying
+          frequency content well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Chirp signal is a realistic scenario used in audio analysis, radar,
+          and communications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sample rate of 4000 Hz, 2-second duration, frequency range up to
+          Nyquist - all sensible values
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses a custom class `SpectrogramHeatmap` extending pygal's Graph
+          - necessary for the heatmap functionality not natively supported
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, scipy.signal, pygal)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 2
+        max: 1
+        passed: true
+        comment: Saves as plot.png, plot.svg, and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creates a custom pygal Graph subclass with SVG rendering, demonstrates
+          pygal's extensibility for non-standard chart types
+  verdict: APPROVED
diff --git a/plots/spectrogram-basic/metadata/seaborn.yaml b/plots/spectrogram-basic/metadata/seaborn.yaml
index 0e11c9b5fc..cbbc1b5b7d 100644
--- a/plots/spectrogram-basic/metadata/seaborn.yaml
+++ b/plots/spectrogram-basic/metadata/seaborn.yaml
@@ -22,3 +22,173 @@ review:
     orientation
   weaknesses:
   - No grid lines present (minor issue for heatmap plots)
+  image_description: The plot displays a spectrogram showing a chirp signal with frequency
+    increasing linearly from approximately 100 Hz to 800 Hz over a 2-second duration.
+    The visualization uses the viridis colormap (dark purple to yellow), with the
+    bright yellow diagonal line clearly showing the frequency sweep against a teal/green
+    background representing noise. The x-axis is labeled "Time (s)" ranging from 0.0
+    to 2.0, and the y-axis is labeled "Frequency (Hz)" ranging from 0 to 2000. A colorbar
+    on the right shows "Power (dB)" with values from approximately -90 to -20 dB.
+    The title correctly displays "spectrogram-basic · seaborn · pyplots.ai".
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable with
+          appropriate font sizes (24pt title, 20pt labels, 16pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: The spectrogram data is clearly visible with excellent contrast between
+          the signal (yellow) and background noise (teal/green)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is perceptually uniform and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas, though colorbar takes some space; plot fills
+          adequate area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes include units: "Time (s)" and "Frequency (Hz)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid present (not critical for heatmaps, but could enhance readability)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct spectrogram/heatmap visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, frequency on y-axis, power as color intensity
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes colorbar with dB units, proper axis labels, perceptually
+          uniform colormap as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full time range (0-2s) and frequency range (0-2000 Hz) displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Power (dB)"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "spectrogram-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Chirp signal excellently demonstrates time-frequency relationship,
+          clearly shows frequency increasing over time
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Chirp signal is a standard test signal in signal processing; could
+          be more domain-specific (e.g., labeled as radar chirp or audio sweep)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sample rate of 4000 Hz, frequencies 100-800 Hz, 2-second duration
+          are all realistic signal processing values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, seaborn, scipy)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn and scipy APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.heatmap with cbar_kws for colorbar customization; could
+          leverage more seaborn-specific styling
+  verdict: APPROVED
diff --git a/plots/spectrum-basic/metadata/altair.yaml b/plots/spectrum-basic/metadata/altair.yaml
index e3c17f9519..a71ea4b6a5 100644
--- a/plots/spectrum-basic/metadata/altair.yaml
+++ b/plots/spectrum-basic/metadata/altair.yaml
@@ -23,3 +23,165 @@ review:
   weaknesses:
   - Could add mark_rule() or mark_point() to highlight peak frequencies
   - No use of Altair selection/interactivity features (zoom/pan)
+  image_description: 'The plot displays a frequency spectrum with a blue line on a
+    white background. The X-axis shows "Frequency (Hz)" from 0 to 300 Hz, and the
+    Y-axis shows "Amplitude (dB)" from -80 to 10 dB. Three distinct peaks are clearly
+    visible: a dominant peak at 50 Hz reaching approximately 0 dB, a secondary peak
+    at 120 Hz at about -7 dB, and a smaller peak at 200 Hz at approximately -13 dB.
+    The noise floor fluctuates around -45 to -50 dB. The title "spectrum-basic · altair
+    · pyplots.ai" appears at the top. Subtle gray grid lines are visible, and the
+    overall layout is well-balanced with good canvas utilization.'
+  criteria_checklist:
+    visual_quality:
+      score: 40
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text clearly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line stroke width appropriate for data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Both axes have units (Hz, dB)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha 0.3), no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct line chart for frequency spectrum
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency on X, Amplitude on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: FFT data, dB scale, clear frequency components
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-300 Hz range shows all peaks
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: spectrum-basic · altair · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows dominant peak, harmonics, weak component, and noise floor
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Signal processing scenario with realistic frequency components
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic values for audio/signal analysis
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Tooltips with formatting, declarative encoding, configure chain
+  verdict: APPROVED
diff --git a/plots/spectrum-basic/metadata/bokeh.yaml b/plots/spectrum-basic/metadata/bokeh.yaml
index 47a8c1a363..aab4cdbcf6 100644
--- a/plots/spectrum-basic/metadata/bokeh.yaml
+++ b/plots/spectrum-basic/metadata/bokeh.yaml
@@ -24,3 +24,179 @@ review:
     legend text
   - Missing HoverTool which would showcase Bokeh interactivity for showing exact frequency/amplitude
     values
+  image_description: The plot displays a frequency spectrum with amplitude (dB) on
+    the Y-axis ranging from approximately -80 to 0, and frequency (Hz) on the X-axis
+    from 0 to 500 Hz. The main data is shown as a blue line (#306998) with a subtle
+    light blue fill underneath. Three prominent peaks are clearly visible at 50 Hz,
+    150 Hz, and 400 Hz, marked with yellow/gold circle markers (#FFD43B) and dashed
+    vertical lines extending down to -80 dB. The title "spectrum-basic · bokeh · pyplots.ai"
+    appears in the top-left. A legend in the top-right shows "Signal Spectrum" and
+    the three peak frequencies. The background is a light off-white (#fafafa) with
+    dashed grid lines. The noise floor is visible around -60 to -70 dB with typical
+    FFT variation.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 20pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend is well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 4 is excellent for the canvas size, peaks clearly marked
+          with large circle markers (size=25)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good utilization of canvas, slight excess whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Frequency (Hz)" and
+          "Amplitude (dB)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend labels are too small/hard to read at full resolution, legend
+          items appear cramped
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct frequency spectrum plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency on X-axis, amplitude in dB on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple frequency components shown, peak highlighting present, dB
+          scale used
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range (0-500 Hz, -80 to 0 dB)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies signal spectrum and peak frequencies
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "spectrum-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows fundamental frequency (50 Hz), harmonic (150 Hz), separate
+          component (400 Hz), and noise floor - excellent demonstration of spectrum
+          analysis
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Synthetic signal is plausible but generic; could be more specific
+          (e.g., "motor vibration" or "audio signal")
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic frequency range (0-500 Hz), appropriate sample rate (8192
+          Hz), sensible dB scale
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both PNG and HTML, but code structure is correct
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, varea for fill, interactive tools, but could
+          leverage more Bokeh-specific features like HoverTool for frequency/amplitude
+          display
+  verdict: APPROVED
diff --git a/plots/spectrum-basic/metadata/highcharts.yaml b/plots/spectrum-basic/metadata/highcharts.yaml
index c67db2b527..feb3f40fed 100644
--- a/plots/spectrum-basic/metadata/highcharts.yaml
+++ b/plots/spectrum-basic/metadata/highcharts.yaml
@@ -24,3 +24,188 @@ review:
   - Y-axis tick labels are dense with many values displayed; could use larger tick
     intervals for cleaner appearance
   - Legend font size could be larger relative to other text elements
+  image_description: 'The plot displays a frequency spectrum with an areaspline chart
+    showing signal amplitude (dB) across a frequency range of 0-2000 Hz. The chart
+    has a white background with a light blue filled area under the curve. Three prominent
+    peaks are clearly visible at approximately 440 Hz (tallest peak at ~-5 dB), 880
+    Hz (second peak at ~-12 dB), and 1320 Hz (third peak at ~-17 dB), matching the
+    specified harmonics in the subtitle. The baseline noise floor fluctuates around
+    -55 to -65 dB. The title reads "spectrum-basic · highcharts · pyplots.ai" in bold
+    at the top, with a subtitle "Audio Signal Analysis: 440 Hz Fundamental + Harmonics".
+    The Y-axis shows "Amplitude (dB)" and the X-axis shows "Frequency (Hz)". A legend
+    labeled "Power Spectrum" appears in the top-right corner. Grid lines are subtle
+    and appropriately transparent.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title, axis labels, and tick marks are all clearly readable. Font
+          sizes are appropriately large for the 4800x2700 resolution. Minor: Y-axis
+          labels are slightly condensed.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated and
+          readable.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: The areaspline line is well-visible with appropriate line width.
+          The peaks are clearly distinguishable from the noise floor. Gradient fill
+          enhances visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe blue (#306998) with gradient fill. No red-green
+          conflicts.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with appropriate margins. The plot fills
+          a good portion of the canvas. Legend placement is reasonable.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Frequency (Hz)" and
+          "Amplitude (dB)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha 0.1. Legend is placed appropriately but
+          could be larger.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: AreaSpline is appropriate for frequency spectrum visualization, showing
+          continuous amplitude across frequencies.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency correctly on X-axis, amplitude in dB on Y-axis.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows frequency domain representation, identifies dominant frequencies
+          (440, 880, 1320 Hz), includes noise floor.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-2000 Hz range displayed, Y-axis covers the full amplitude
+          range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Power Spectrum".
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "spectrum-basic · highcharts · pyplots.ai".
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows fundamental frequency (440 Hz), harmonics (880, 1320 Hz), and
+          noise floor. Demonstrates key aspects of spectrum analysis.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Audio signal analysis with A4 musical note fundamental and harmonics
+          is a real, comprehensible scenario commonly used in audio engineering.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Frequency range (0-2000 Hz) is appropriate for audio. dB scale is
+          correct. Amplitude values are realistic.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data generation → FFT → chart configuration
+          → export. No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible noise.
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: All imports are used, but import ordering could be cleaner (stdlib,
+          third-party, local).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts_core APIs.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs plot.png (correct) but strict=True in zip is Python 3.10+
+          specific style.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses AreaSplineSeries with gradient fill, custom chart styling, and
+          proper Highcharts export workflow. Good use of interactive features with
+          HTML export.
+  verdict: APPROVED
diff --git a/plots/spectrum-basic/metadata/letsplot.yaml b/plots/spectrum-basic/metadata/letsplot.yaml
index 52f30cfead..4655321704 100644
--- a/plots/spectrum-basic/metadata/letsplot.yaml
+++ b/plots/spectrum-basic/metadata/letsplot.yaml
@@ -23,3 +23,174 @@ review:
   - Grid could benefit from slightly more transparency (alpha currently at 0.3 which
     is acceptable but on the higher end)
   - HTML output path handling could be cleaner (uses path="." parameter)
+  image_description: The plot displays a frequency spectrum with three prominent peaks
+    at 50 Hz (~54 dB), 120 Hz (~48 dB), and 200 Hz (~44 dB), corresponding to the
+    signal components defined in the code. A blue line traces the amplitude across
+    the frequency range (0-300 Hz), with a semi-transparent light blue area fill underneath.
+    The noise floor fluctuates around 5-15 dB with realistic variation. The title
+    "spectrum-basic · letsplot · pyplots.ai" is displayed at the top. Axis labels
+    clearly show "Frequency (Hz)" and "Amplitude (dB)". The grid uses subtle gray
+    lines. Overall layout is well-balanced with the plot filling the canvas appropriately.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line and area fill are clearly visible; line thickness is appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Frequency (Hz)" and "Amplitude (dB)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate, but no legend present (not strictly
+          needed for single-series plot, minor deduction)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct frequency spectrum line plot with area fill
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency on X-axis, Amplitude (dB) on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows frequency domain representation, dB scale, clear peaks at dominant
+          frequencies
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range (0-300 Hz, -15 to 55 dB)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, appropriate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "spectrum-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows dominant peaks (50, 120, 200 Hz), harmonics relationship, and
+          noise floor; could show wider frequency range for more comprehensive coverage
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Signal processing scenario with fundamental frequency, harmonics,
+          and noise is realistic and educational
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: dB values are reasonable; frequency range limited to 300 Hz but appropriate
+          for the signal
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" but also saves plot.html (minor issue with path
+          parameter usage)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (geom_line + geom_area), theme_minimal, and proper
+          ggsize/ggsave with scale parameter; could leverage more lets-plot specific
+          features like tooltips
+  verdict: APPROVED
diff --git a/plots/spectrum-basic/metadata/matplotlib.yaml b/plots/spectrum-basic/metadata/matplotlib.yaml
index b58f56da6c..08a11c1f1a 100644
--- a/plots/spectrum-basic/metadata/matplotlib.yaml
+++ b/plots/spectrum-basic/metadata/matplotlib.yaml
@@ -25,3 +25,179 @@ review:
   - No legend present, though for this single-series plot it is acceptable
   - Could use more distinctive matplotlib features like spine customization or logarithmic
     frequency scale option
+  image_description: The plot displays a frequency spectrum with amplitude (dB) on
+    the Y-axis ranging from -60 to 10, and frequency (Hz) on the X-axis ranging from
+    0 to 300. The spectrum line is rendered in a blue color (#306998) with a semi-transparent
+    fill beneath the curve. Three prominent peaks are clearly visible at 50 Hz, 120
+    Hz, and 200 Hz, each marked with yellow circular markers and annotated with their
+    frequency values. Vertical yellow dashed lines highlight each peak frequency.
+    The noise floor fluctuates around -40 to -50 dB. The title follows the correct
+    format "spectrum-basic · matplotlib · pyplots.ai". The overall layout is clean
+    with balanced margins and a subtle grid.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, annotations positioned with offset
+          to avoid collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width of 2.5 is good, markers at s=200 are visible, fill provides
+          good emphasis. Slight deduction as line could be slightly thicker for 4800×2700
+          canvas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) combination is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with tight_layout(), balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Frequency (Hz)" and "Amplitude (dB)" - descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha=0.3. No legend present but none is required
+          for this single-series plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct frequency spectrum plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency on X-axis, amplitude (dB) on Y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows multiple frequency components, uses dB scale, peak annotation
+          as suggested in Notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axis limits appropriately set (0-300 Hz, -60 to
+          10 dB)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single data series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "spectrum-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows fundamental frequency (50 Hz), harmonic (120 Hz), additional
+          component (200 Hz), and noise floor. Could show wider frequency range or
+          more complex harmonics
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Synthetic signal with multiple frequency components is a realistic
+          signal processing scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (1000 Hz sample rate, dB range -60 to 10), though
+          peak amplitudes could vary more
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses standard matplotlib features (plot, fill_between, scatter, annotate,
+          axvline). Could leverage more advanced features like twin axes or custom
+          tick formatting
+  verdict: APPROVED
diff --git a/plots/spectrum-basic/metadata/plotly.yaml b/plots/spectrum-basic/metadata/plotly.yaml
index a6249d8df9..d0fbdbda0f 100644
--- a/plots/spectrum-basic/metadata/plotly.yaml
+++ b/plots/spectrum-basic/metadata/plotly.yaml
@@ -22,3 +22,179 @@ review:
   - Does not leverage Plotly's interactive features (hover tooltips showing exact
     dB values, zoom capabilities display)
   - Grid transparency could be more subtle (currently visible but not obtrusive)
+  image_description: The plot displays a frequency spectrum showing amplitude (dB)
+    on the Y-axis (ranging from -80 to 0) versus frequency (Hz) on the X-axis (ranging
+    from 0 to 500). The spectrum is rendered as a blue line (#306998) with a light
+    blue fill underneath. Three prominent peaks are clearly visible at 50 Hz, 120
+    Hz, and 300 Hz, each annotated with yellow arrow pointers and labeled with their
+    respective frequencies. The 50 Hz peak is the strongest at approximately -3 dB,
+    the 120 Hz peak reaches about -10 dB, and the 300 Hz peak is around -15 dB. A
+    noisy baseline fluctuates between -45 and -70 dB. The title "spectrum-basic ·
+    plotly · pyplots.ai" is centered at the top. The background is white with subtle
+    gray grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; annotations are well-positioned above peaks
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width is appropriate for the frequency resolution; peaks are
+          clearly distinguishable from noise floor
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue color scheme with yellow annotations provides good contrast
+          and is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills the canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Frequency (Hz)" and
+          "Amplitude (dB)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is present but alpha is slightly high; no legend needed for
+          single trace
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct frequency spectrum line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency correctly on X-axis, amplitude (dB) on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: FFT computation, dB scale, peak annotations as suggested in spec
+          notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis limited to 0-500 Hz showing the meaningful frequency range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Single trace, no legend needed; trace named appropriately
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "spectrum-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple frequency components at different amplitudes, plus
+          realistic noise floor
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Signal processing scenario with plausible 50/120/300 Hz components
+          (power line frequency, harmonics)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency range (0-500 Hz) and dB values (-80 to 0) are realistic
+          for audio/electrical signals
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data generation → FFT → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: While the implementation is correct, it doesn't leverage Plotly's
+          interactive features in the static output; the fill area and annotations
+          are basic plotly features but nothing that showcases Plotly's unique strengths
+          like hover info customization or range sliders
+  verdict: APPROVED
diff --git a/plots/spectrum-basic/metadata/plotnine.yaml b/plots/spectrum-basic/metadata/plotnine.yaml
index 7f0710fb8e..7b7da4ea16 100644
--- a/plots/spectrum-basic/metadata/plotnine.yaml
+++ b/plots/spectrum-basic/metadata/plotnine.yaml
@@ -23,3 +23,179 @@ review:
   weaknesses:
   - Grid lines not visible because panel_grid_major=element_text(alpha=0.3) uses wrong
     element type - should be element_line not element_text
+  image_description: The plot displays a frequency spectrum on a logarithmic X-axis
+    ranging from 10 Hz to 1000 Hz. The Y-axis shows amplitude in dB, ranging from
+    approximately -80 dB to 0 dB. The plot uses a blue (#306998) line to trace the
+    spectrum. Clear frequency peaks are visible at 50 Hz (fundamental, ~0 dB), 100
+    Hz (2nd harmonic, ~-5 dB), 150 Hz (3rd harmonic, ~-10 dB), and 500 Hz (high-frequency
+    component, ~-16 dB). The noise floor is visible around -50 to -60 dB with expected
+    fluctuations at higher frequencies. The title "spectrum-basic · plotnine · pyplots.ai"
+    is displayed at the top in bold. Axis labels show "Frequency (Hz)" and "Amplitude
+    (dB)" with proper units. Log tick marks are visible on the X-axis bottom. The
+    background is clean with a minimal theme.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold 24pt, axis titles 20pt, tick labels 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line is visible and appropriate thickness (1.2), slight deduction
+          as the line could be marginally thicker for the noisy high-frequency region
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with good contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well; minor margin asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Frequency (Hz)" and
+          "Amplitude (dB)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid lines are not visible despite `panel_grid_major` being set;
+          no legend needed for single series but grid should be visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct frequency spectrum line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency on X-axis, amplitude on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Logarithmic frequency axis, dB scale amplitude, clear labels with
+          units, identifiable peaks
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full frequency range (10-1000 Hz) displayed, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "spectrum-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows fundamental frequency, harmonics (2nd, 3rd), isolated high-frequency
+          component, and noise floor - demonstrates all aspects of frequency spectrum
+          analysis
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulates mechanical vibration signal with motor rotation frequency
+          and harmonics - realistic engineering scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: FFT with 4096 Hz sample rate, 50 Hz fundamental, dB scale ranging
+          from ~0 to -80 dB - all physically sensible values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data generation → FFT → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses `annotation_logticks` which is a nice plotnine feature, but
+          the `panel_grid_major=element_text(alpha=0.3)` is incorrect syntax (should
+          use `element_line` not `element_text`), causing grid lines not to render
+          - this is a code bug that reduces the score
+  verdict: APPROVED
diff --git a/plots/spectrum-basic/metadata/pygal.yaml b/plots/spectrum-basic/metadata/pygal.yaml
index 1df7a56691..d768d585ee 100644
--- a/plots/spectrum-basic/metadata/pygal.yaml
+++ b/plots/spectrum-basic/metadata/pygal.yaml
@@ -28,3 +28,183 @@ review:
     as suggested in the specification
   - Font sizes in the custom style are quite large (title_font_size=72) which may
     be excessive; could be slightly reduced for better proportions
+  image_description: 'The plot displays a frequency spectrum visualization with three
+    distinct peaks at the expected frequencies: 50 Hz (dominant peak reaching ~10
+    dB), 120 Hz (secondary peak at ~2 dB), and 200 Hz (tertiary peak at ~-5 dB). The
+    visualization uses a filled area chart in Python Blue (#306998) against a white
+    background. The noise floor is visible around -30 to -40 dB with natural variation.
+    The title "spectrum-basic · pygal · pyplots.ai" appears at the top. Axis labels
+    clearly show "Frequency (Hz)" on the X-axis (0-300 Hz range) and "Amplitude (dB)"
+    on the Y-axis (-60 to 20 dB range). Grid lines are subtle and non-distracting.
+    The layout is well-balanced with good use of canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          large canvas size. Font sizes are appropriately scaled.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and tick values are cleanly
+          spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'The filled area chart clearly shows the spectrum pattern with peaks
+          and noise floor visible. Minor: the fill makes it slightly harder to see
+          fine structure in noisy regions.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (blue) with good contrast against white background;
+          no colorblind issues.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization with balanced margins. Plot area fills most
+          of the canvas appropriately.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Both axes have descriptive labels with units: "Frequency (Hz)" and
+          "Amplitude (dB)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid is subtle and appropriate. Legend is hidden (appropriate for
+          single series), but minor: no legend means the series name is not visible.'
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct frequency spectrum plot showing amplitude vs frequency.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency correctly mapped to X-axis, amplitude (dB) to Y-axis.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Shows frequency components, dB scale, clear peaks. Minor: spec suggests
+          highlighting peak frequencies or annotating dominant components - not implemented.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all relevant data; 0-300 Hz range captures all signal components.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend hidden (appropriate for single series spectrum).
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "spectrum-basic · pygal · pyplots.ai".
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple frequency components at different amplitudes, noise
+          floor, and dB scale representation. Shows the key aspects of a spectrum
+          plot.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Signal processing scenario with realistic frequencies (50 Hz, 120
+          Hz, 200 Hz) that could represent real-world signals like electrical interference,
+          machinery vibration, etc.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequency range (0-300 Hz) and amplitude range (-60 to 20 dB) are
+          realistic for signal analysis.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure with inline code.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible results.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: numpy, pygal, and Style.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses `strict=True` in zip() which is Python 3.10+ only; minor compatibility
+          concern.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html correctly.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of pygal's XY chart type, custom Style for large canvas,
+          fill option for area chart effect. Could have used more pygal-specific features
+          like tooltips or interactive elements.
+  verdict: APPROVED
diff --git a/plots/spectrum-basic/metadata/seaborn.yaml b/plots/spectrum-basic/metadata/seaborn.yaml
index c30abb7309..861007af80 100644
--- a/plots/spectrum-basic/metadata/seaborn.yaml
+++ b/plots/spectrum-basic/metadata/seaborn.yaml
@@ -24,3 +24,174 @@ review:
     more distinctive seaborn features
   - Grid styling uses alpha=0.3 which is acceptable but slightly at the edge of the
     0.2-0.4 recommendation
+  image_description: The plot displays a frequency spectrum with a blue line showing
+    amplitude (in dB) on the y-axis ranging from -60 to 10, and frequency (Hz) on
+    the x-axis from 0 to 300. The title "spectrum-basic · seaborn · pyplots.ai" is
+    clearly displayed at the top. Four distinct peaks are visible at approximately
+    50 Hz (~7 dB), 100 Hz (~0 dB), 150 Hz (~-2 dB), and 220 Hz (~-10 dB). Yellow dashed
+    vertical lines mark these peak frequencies. The area under the curve is filled
+    with a light blue/teal semi-transparent fill. The background noise floor sits
+    around -30 to -40 dB with natural variation. Grid lines are subtle and dashed.
+    Overall, the plot has a clean, professional appearance with good use of the canvas
+    space.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line is visible with good width (2.5), fill adds clarity, peaks are
+          distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme (#306998) with good contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with 16:9 aspect ratio, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Clear labels with units: "Frequency (Hz)" and "Amplitude (dB)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend present; however, for a
+          single-series spectrum plot, no legend is needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct frequency spectrum/line plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=frequency, Y=amplitude correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has FFT-based spectrum, dB scale, peak highlighting with vertical
+          lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Focused range 0-300 Hz shows all relevant data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, properly handled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "spectrum-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows fundamental (50 Hz) with harmonics (100, 150 Hz), additional
+          component (220 Hz), and noise floor - demonstrates key spectrum features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Machinery vibration analysis scenario is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: FFT bins at 1000 Hz sample rate are appropriate; dB scale range (-60
+          to 10) is realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only used imports: matplotlib, numpy, seaborn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as "plot.png" but in current directory, not relative path issue
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn's lineplot and set_context for styling, but could use more
+          distinctive seaborn features
+        score: 3
+        max: 5
+        passed: true
+        comment: ''
+  verdict: APPROVED
diff --git a/plots/stem-basic/metadata/altair.yaml b/plots/stem-basic/metadata/altair.yaml
index d1e6748f28..99f59eb526 100644
--- a/plots/stem-basic/metadata/altair.yaml
+++ b/plots/stem-basic/metadata/altair.yaml
@@ -26,3 +26,174 @@ review:
   - Axis labels lack units (e.g., Amplitude (a.u.) or Sample Index (n))
   - Data scenario could be more specific (e.g., audio sample, ECG signal) rather than
     generic damped oscillation
+  image_description: The plot displays a basic stem plot representing a damped oscillation
+    signal with 30 data points. Blue circular markers are positioned at each data
+    value, connected by thin blue vertical lines (stems) extending down to a black
+    horizontal baseline at y=0. The stems are rendered in a pleasant blue color (#306998)
+    with white-stroked circular markers at their tips. The title "stem-basic · altair
+    · pyplots.ai" appears at the top center. The x-axis is labeled "Sample Index"
+    ranging from 0 to 30, and the y-axis is labeled "Amplitude" ranging from approximately
+    -0.8 to 1.2. A subtle dashed grid (alpha 0.3) provides visual reference. The damped
+    oscillation pattern is clearly visible, starting with high amplitude at index
+    0 (~1.0) and decaying toward zero with oscillating positive and negative values.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, all text fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (size=300) and stems (strokeWidth=2.5) well-sized for 30
+          data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: '"Sample Index" and "Amplitude" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha 0.3 with dashed style, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stem plot with markers and stems from baseline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=sequence index, Y=signal amplitude correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: thin stems, visible markers, baseline
+          at y=0'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend required, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "stem-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows full oscillation behavior: positive/negative values, decay,
+          varying amplitudes'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Damped oscillation is plausible but generic signal processing example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Amplitude values (-0.7 to 1.0) and 30 samples are realistic for discrete
+          signals
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only uses altair, numpy, pandas - all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" but also saves HTML (minor - HTML is expected
+          for Altair)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive Altair features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of mark_rule for stems, mark_circle for markers, y/y2
+          encoding for range, layered composition, tooltips for interactivity
+  verdict: APPROVED
diff --git a/plots/stem-basic/metadata/bokeh.yaml b/plots/stem-basic/metadata/bokeh.yaml
index e01cfd6747..6d35b735dc 100644
--- a/plots/stem-basic/metadata/bokeh.yaml
+++ b/plots/stem-basic/metadata/bokeh.yaml
@@ -23,3 +23,172 @@ review:
   - Axis labels missing units (e.g., "Amplitude (V)" or "Amplitude (a.u.)")
   - Does not leverage Bokeh interactive features like HoverTool to show exact values
   - Bokeh toolbar visible in output image could be hidden for cleaner static export
+  image_description: The plot displays a stem plot showing a damped oscillation signal
+    with 30 data points. Blue vertical stems (#306998) extend from a dashed gray baseline
+    at y=0 to circular markers at each data point. The signal starts with high amplitude
+    values (~1.5) and decays over time, oscillating between positive and negative
+    values before settling near zero. The title "stem-basic · bokeh · pyplots.ai"
+    appears in the top-left. Axes are labeled "Sample Index" (x) and "Amplitude" (y).
+    The Bokeh toolbar is visible in the top-right corner. Grid lines are subtle with
+    dashed styling.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable; font sizes are
+          good (36pt title, 28pt labels, 22pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers (size=25) and stems (line_width=4) are clearly visible; markers
+          could be slightly larger for optimal viewing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme, excellent contrast against white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills appropriate space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but missing units (could be "Amplitude (V)" or
+          similar)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3) and dashed, but Bokeh toolbar slightly
+          distracts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stem plot implementation using segment + scatter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (sample index) and Y (amplitude) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: stems, markers at top, baseline at y=0'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (appropriate)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "stem-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive and negative values, damping pattern; could show more
+          varied discrete events
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Damped oscillation is a plausible signal processing scenario, though
+          labeled generically
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Amplitude values are reasonable for a normalized signal
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and segment/scatter methods correctly, but
+          does not leverage interactive features like hover tooltips or tools configuration
+  verdict: APPROVED
diff --git a/plots/stem-basic/metadata/highcharts.yaml b/plots/stem-basic/metadata/highcharts.yaml
index 1ff0a84feb..2dd9808cca 100644
--- a/plots/stem-basic/metadata/highcharts.yaml
+++ b/plots/stem-basic/metadata/highcharts.yaml
@@ -23,3 +23,181 @@ review:
   weaknesses:
   - Axis labels lack units (e.g., "Amplitude (a.u.)" or "Sample Index (n)")
   - Image height is 2661px instead of exactly 2700px (minor dimension discrepancy)
+  image_description: The plot displays a stem plot showing a damped oscillation signal
+    with 30 data points. Each data point is represented by a blue (#306998) circular
+    marker connected to the y=0 baseline by a thin vertical stem line. The title "stem-basic
+    · highcharts · pyplots.ai" appears at the top. The x-axis is labeled "Sample Index"
+    (0-29) and the y-axis is labeled "Amplitude" (ranging from approximately -0.8
+    to 1.15). The plot shows a classic damped oscillation pattern starting with high
+    amplitude (~1.0) and decaying over time with alternating positive and negative
+    values. A subtle grid is visible with light gray lines. The baseline at y=0 is
+    clearly marked with a darker horizontal line.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable. Font
+          sizes are appropriate for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized (radius 10) with good visibility.
+          Stems are thin but clearly visible. 30 data points with this marker size
+          is optimal.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme (#306998 blue) with white marker borders. No
+          colorblind issues.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space. Minor deduction: slightly more whitespace
+          at the bottom than necessary.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Sample Index", "Amplitude") but no units provided.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha 0.1), legend disabled which is appropriate for
+          single series.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stem plot with vertical lines from baseline to markers.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is sample index, Y is amplitude value. Correctly assigned.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: thin vertical stems, circular markers
+          at top, baseline at y=0.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 30 data points visible within axis range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled for single-series plot.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "stem-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: 'Shows damped oscillation with both positive and negative values.
+          Good demonstration of stem plot capabilities. Minor: could show more variety
+          in the oscillation pattern.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Damped oscillation is a realistic signal processing scenario. Context
+          is appropriate.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Amplitude values between -0.8 and 1.15 are realistic for normalized
+          signal data.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → series → export. No
+          functions/classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible results.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but aspect ratio is 4800x2700 not exact (actual
+          appears 4800x2661 based on image metadata).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts plotLines for baseline, scatter series for markers,
+          line series for stems. Good use of Highcharts options system but no advanced
+          interactive features enabled.
+  verdict: APPROVED
diff --git a/plots/stem-basic/metadata/letsplot.yaml b/plots/stem-basic/metadata/letsplot.yaml
index 99f1cb1984..fce4d667c9 100644
--- a/plots/stem-basic/metadata/letsplot.yaml
+++ b/plots/stem-basic/metadata/letsplot.yaml
@@ -23,3 +23,173 @@ review:
   weaknesses:
   - Could add units to axis labels (e.g., "Amplitude (a.u.)" for arbitrary units)
   - Does not leverage lets-plot interactive features like tooltips
+  image_description: The plot displays a stem plot with 30 data points representing
+    a damped oscillation signal. Blue vertical stems (#306998) extend from a black
+    horizontal baseline at y=0 to blue circular markers at the top of each stem. The
+    x-axis is labeled "Sample Index" (ranging 0-30), and the y-axis is labeled "Amplitude"
+    (ranging approximately -0.7 to 1.1). The title "stem-basic · letsplot · pyplots.ai"
+    is positioned at the top-left. The plot uses a clean minimal theme with light
+    gray dashed grid lines. The data clearly shows the damped oscillation pattern
+    - high amplitude oscillations at the start that progressively decay toward zero
+    as the sample index increases.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at proper
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stems and markers are well-sized for the 30 data points, appropriate
+          alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but no units ("Amplitude" could specify units)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle with dashed lines, but baseline could have better
+          contrast
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stem plot implementation using segments + points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned (sample index vs amplitude)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: vertical stems, markers at top, baseline
+          at y=0'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "stem-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows damped oscillation with both positive and negative values,
+          good variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Damped oscillation is a realistic signal processing scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: false
+        comment: Values are sensible but somewhat abstract without real-world context
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar correctly with geom_segment + geom_point combination,
+          but doesn't leverage any uniquely lets-plot features like interactivity
+          or tooltips
+  verdict: APPROVED
diff --git a/plots/stem-basic/metadata/matplotlib.yaml b/plots/stem-basic/metadata/matplotlib.yaml
index ff01fe23cb..a20a945650 100644
--- a/plots/stem-basic/metadata/matplotlib.yaml
+++ b/plots/stem-basic/metadata/matplotlib.yaml
@@ -23,3 +23,165 @@ review:
   weaknesses:
   - Axis labels lack units (e.g., "Sample Index (n)" or "Amplitude (V)" would score
     higher)
+  image_description: The plot displays a stem plot with 30 data points showing a damped
+    oscillation signal. Blue (#306998) vertical stems extend from a black baseline
+    at y=0 to circular markers with white edges at each data point. The x-axis is
+    labeled "Sample Index" (0-30) and the y-axis "Amplitude" (-0.75 to 1.0). The title
+    correctly follows the format "stem-basic · matplotlib · pyplots.ai". A subtle
+    dashed grid (alpha=0.3) provides reference without distraction. The signal demonstrates
+    clear damping behavior, starting at amplitude ~1.0 and decaying over time with
+    oscillating positive and negative values.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers and stems clearly visible, sizing appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but missing units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stem plot using ax.stem()
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thin stems, clear markers, baseline at y=0
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses `{spec-id} · {library} · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows damped oscillation with positive/negative values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Signal processing scenario matches spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Reasonable amplitude values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of ax.stem() with plt.setp() customization
+  verdict: APPROVED
diff --git a/plots/stem-basic/metadata/plotly.yaml b/plots/stem-basic/metadata/plotly.yaml
index 9bce46327b..2809588eb1 100644
--- a/plots/stem-basic/metadata/plotly.yaml
+++ b/plots/stem-basic/metadata/plotly.yaml
@@ -26,3 +26,166 @@ review:
     quantities)
   - Grid alpha (0.1) is very subtle, could be slightly more visible (0.2-0.3) for
     better readability
+  image_description: The plot displays a basic stem plot showing a damped oscillation
+    signal with 30 data points. Blue stems (#306998) extend vertically from a dark
+    horizontal baseline at y=0 to circular markers at each data value. The markers
+    are well-sized with white borders for visibility. Title "stem-basic · plotly ·
+    pyplots.ai" is centered at the top. X-axis is labeled "Sample Index" (ranging
+    0-30), Y-axis is labeled "Amplitude" (ranging approximately -0.8 to 1.0). The
+    plot uses a clean white background (plotly_white template) with subtle gray gridlines.
+    The data shows the characteristic decay pattern of a damped cosine wave with small
+    random noise, demonstrating both positive and negative amplitude values.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text clearly readable at full size, appropriate font scaling
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: markers well-sized (16px), stems visible (width 2)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: single blue color scheme, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: good canvas utilization, minor extra whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: descriptive labels but missing units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: grid subtle, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct stem plot with vertical lines and markers
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: all spec features present (stems, markers, baseline at y=0)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: axes show all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: no legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct {spec-id} · {library} · pyplots.ai format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows positive/negative values, decay pattern
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: damped oscillation is plausible signal processing scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: sensible normalized amplitude values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only used imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: interactive hover templates with amplitude formatting, HTML export
+  verdict: APPROVED
diff --git a/plots/stem-basic/metadata/seaborn.yaml b/plots/stem-basic/metadata/seaborn.yaml
index 1368c7bafd..6854bf40a9 100644
--- a/plots/stem-basic/metadata/seaborn.yaml
+++ b/plots/stem-basic/metadata/seaborn.yaml
@@ -23,3 +23,176 @@ review:
   weaknesses:
   - Limited use of seaborn distinctive features beyond scatterplot; the implementation
     primarily uses matplotlib for the stem visualization
+  image_description: The plot displays a stem plot showing a damped sinusoidal impulse
+    response. It has 30 data points with yellow circular markers (with blue outlines)
+    connected by blue vertical stems to a horizontal baseline at y=0. The x-axis is
+    labeled "Sample Index (n)" ranging from 0 to 30, and the y-axis is labeled "Amplitude"
+    ranging from approximately -1.0 to 1.8. The title reads "stem-basic · seaborn
+    · pyplots.ai" in the correct format. The plot shows a characteristic damped oscillation
+    pattern with high amplitude early samples that decay exponentially while oscillating.
+    A subtle gray dashed grid is visible in the background. The top and right spines
+    have been removed for a cleaner appearance.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (s=300) with good contrast; stems are visible
+          at linewidth 2.5
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with tight_layout, slight excess whitespace on right
+          side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sample Index (n)" and "Amplitude"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend needed for this single-series
+          plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct stem plot with markers connected to baseline by vertical
+          lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X is sample index, Y is amplitude - correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Stems, markers, baseline at y=0 all present as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series, N/A
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "stem-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows damped oscillation with both positive and negative values;
+          demonstrates stem plot capabilities well
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Damped sinusoidal impulse response is a realistic signal processing
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Amplitude values are reasonable for a normalized signal response
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dpi and bbox_inches
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses sns.scatterplot for markers which is appropriate, but the main
+          visualization relies heavily on matplotlib's vlines and axhline. Seaborn
+          doesn't have a native stem plot function, so this hybrid approach is acceptable
+          but not showcasing seaborn's distinctive features.
+  verdict: APPROVED
diff --git a/plots/step-basic/metadata/altair.yaml b/plots/step-basic/metadata/altair.yaml
index eec91a5b8a..ee1adf1c63 100644
--- a/plots/step-basic/metadata/altair.yaml
+++ b/plots/step-basic/metadata/altair.yaml
@@ -24,3 +24,157 @@ review:
   weaknesses:
   - Could add tooltips for enhanced interactivity since Altair excels at this
   - Grid styling could be slightly more refined (gridDash appears somewhat prominent)
+  image_description: The plot displays a step chart showing monthly cumulative sales
+    data from January to December. The chart uses a blue color (#306998) for both
+    the stepped line and filled circular markers at each data point. The step pattern
+    uses "step-after" interpolation, creating the characteristic stair-step appearance
+    where values remain constant until the next data point. The X-axis shows abbreviated
+    month names (Jan-Dec) with horizontal labels, while the Y-axis displays "Cumulative
+    Sales (thousands $)" ranging from 0 to approximately 180. The title "step-basic
+    · altair · pyplots.ai" appears at the top in a clear, readable font. Subtle dashed
+    grid lines aid in reading values. The overall layout shows good canvas utilization
+    with balanced margins.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; month labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line stroke width (4) and marker size (200) are well-adapted for
+          12 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "(thousands $)", X-axis labeled
+          "Month"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step/stair plot using step-after interpolation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Month (categorical), Y=Cumulative Sales (quantitative) correctly
+          assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step pattern present, markers at data points as suggested in spec
+          notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, Y-axis starts at 0 and extends appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "step-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear step pattern with cumulative increases; could benefit
+          from showing varied step sizes more dramatically
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly cumulative sales is a real, comprehensible scenario matching
+          spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values (12-168 thousands) are plausible for annual sales; growth
+          pattern is realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values used)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, pandas, os, sys for workaround)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/step-basic/metadata/bokeh.yaml b/plots/step-basic/metadata/bokeh.yaml
index a4d0e1603e..67ae53cb1c 100644
--- a/plots/step-basic/metadata/bokeh.yaml
+++ b/plots/step-basic/metadata/bokeh.yaml
@@ -24,3 +24,172 @@ review:
   - Both PNG and HTML outputs generated for static and interactive viewing
   weaknesses:
   - Grid styling could be more subtle (dashed lines are slightly prominent)
+  image_description: The plot displays a step chart showing cumulative sales over
+    12 months. The visualization uses a blue step line (#306998) with yellow circular
+    markers (#FFD43B) at each data point. The chart has a light gray background (#fafafa)
+    with dashed grid lines at 0.3 alpha. The title "step-basic · bokeh · pyplots.ai"
+    appears at the top left. The x-axis is labeled "Month" (1-12) and the y-axis is
+    labeled "Cumulative Sales (units)" (0-200). The step pattern clearly shows the
+    stair-step progression from 15 units in January to 195 units in December, with
+    horizontal lines extending to the right of each data point ("after" mode).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 36pt, tick labels at 28pt - all highly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 4 and marker size of 18 are well-suited for the data
+          density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but some empty space on the right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "(units)", X-axis "Month" is descriptive
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha 0.3 is appropriate, but no legend present (acceptable
+          since single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step/stair plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Month, Y=Cumulative Sales correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step style used, markers at data points as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes accommodate full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "step-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows increasing cumulative pattern well, but only demonstrates one
+          step mode ("after")
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly cumulative sales is a perfect real-world scenario for step
+          plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values (15-195) are realistic for a yearly cumulative total
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and Bokeh's native step() glyph with mode parameter,
+          plus scatter for markers. Good but could showcase more Bokeh-specific features
+          like hover tools.
+  verdict: APPROVED
diff --git a/plots/step-basic/metadata/highcharts.yaml b/plots/step-basic/metadata/highcharts.yaml
index 25d5679648..6cfd19d4ba 100644
--- a/plots/step-basic/metadata/highcharts.yaml
+++ b/plots/step-basic/metadata/highcharts.yaml
@@ -24,3 +24,170 @@ review:
     adjustment
   - Only demonstrates one step style (left) when spec mentions pre/post/mid options
   - Layout has slightly unbalanced margins
+  image_description: The plot displays a step chart showing cumulative sales data
+    over 12 months (January through December). The chart uses a deep blue color (#306998)
+    for the step line with circular markers at each data point. The title "step-basic
+    · highcharts · pyplots.ai" appears at the top in bold. The Y-axis is labeled "Cumulative
+    Sales ($K)" ranging from 0 to 825, and the X-axis is labeled "Month" with all
+    12 month abbreviations displayed. The step pattern shows values remaining constant
+    (horizontal lines) until the next month, then jumping vertically to the new value
+    - creating the characteristic staircase appearance. Grid lines are visible on
+    the Y-axis. The data shows a steady upward trend from $45K in January to $780K
+    in December.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width and marker size are well-suited for the data density (12
+          points)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind concerns
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions, though slight imbalance with larger left margin
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "($K)", X-axis clearly labeled "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is enabled but not visible in the image; grid is subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step/stair chart type with "left" step style
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (months) and Y (cumulative sales) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step style implemented, markers at data points, grid lines present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Series named "Cumulative Sales"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "step-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows step pattern well, but only demonstrates one step style (left/post)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly cumulative sales is a realistic and comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales figures ($45K-$780K) are realistic for annual cumulative sales
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts step line feature with marker customization, but
+          could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/step-basic/metadata/letsplot.yaml b/plots/step-basic/metadata/letsplot.yaml
index e0f12c14e3..9c4100cbf9 100644
--- a/plots/step-basic/metadata/letsplot.yaml
+++ b/plots/step-basic/metadata/letsplot.yaml
@@ -25,3 +25,172 @@ review:
     cleaner
   - Markers could be slightly larger (size=6-7) for better visibility at the data
     points
+  image_description: The plot displays a step chart showing cumulative monthly sales
+    data over 12 months. A dark blue (#306998) step line creates a clear stair-step
+    pattern ascending from approximately $45K in month 1 to $822K in month 12. Bright
+    yellow (#FFD43B) circular markers highlight each data point where changes occur.
+    The title "step-basic · letsplot · pyplots.ai" appears at the top left. The x-axis
+    is labeled "Month" with ticks from 1-12, and the y-axis is labeled "Cumulative
+    Sales ($K)" ranging from 50 to 850. A subtle gray dashed grid aids value reading.
+    The plot fills the canvas well with balanced proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step line is well-sized; markers are visible but could be slightly
+          larger for optimal visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe and high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Cumulative Sales ($K)", "Month"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate but no legend present (though not strictly needed
+          for single series)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step/stair plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Month (sequential), Y=Cumulative Sales correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step pattern with markers at data points as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series, no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "step-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows cumulative/monotonic increasing pattern well; could show more
+          variation in step sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly cumulative sales is a perfect real-world scenario matching
+          spec examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for sales data; units in thousands are appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but export path uses "." which works
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, geom_step with direction parameter, theme_minimal,
+          but no advanced lets-plot specific features like tooltips or interactivity
+          in the static output
+  verdict: APPROVED
diff --git a/plots/step-basic/metadata/matplotlib.yaml b/plots/step-basic/metadata/matplotlib.yaml
index 4efbd622db..0afe400315 100644
--- a/plots/step-basic/metadata/matplotlib.yaml
+++ b/plots/step-basic/metadata/matplotlib.yaml
@@ -23,3 +23,177 @@ review:
   - Only demonstrates post step style; could showcase more step variations mentioned
     in spec
   - Could use fill_between to add visual distinction to the stepped area
+  image_description: The plot displays a step (stair-step) chart showing cumulative
+    monthly sales data over 12 months (January through December). The step line is
+    rendered in a blue color (#306998) with horizontal lines connected by vertical
+    segments creating the characteristic stair-step pattern. Yellow circular markers
+    (#FFD43B) with blue borders are placed at each data point to highlight where value
+    changes occur. The chart shows cumulative sales increasing from approximately
+    45 thousand dollars in January to about 849 thousand dollars by December. The
+    x-axis shows month abbreviations (Jan-Dec), and the y-axis displays "Cumulative
+    Sales (thousands $)" ranging from 0 to approximately 850. The title "step-basic
+    · matplotlib · pyplots.ai" appears at the top. A legend in the upper left identifies
+    "Cumulative Sales" (the step line) and "Monthly Totals" (the markers). A subtle
+    dashed grid aids in reading values.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, month labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step line at linewidth=3 is clearly visible, markers at s=200 are
+          appropriately sized for 12 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Month" and "Cumulative Sales (thousands
+          $)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid at alpha=0.3 is subtle, but legend placement in upper-left partially
+          overlaps with the data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step plot using ax.step() with stair-step pattern
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (sequential), Y=cumulative sales (values) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: ''post'' step style, markers at data
+          points, grid lines'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from near 0 to ~850, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify step line and markers
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "step-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows cumulative increasing pattern well, but only demonstrates 'post'
+          style (spec mentions pre/mid/post options)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly cumulative sales is a realistic, comprehensible scenario
+          matching spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values in realistic range (45-105k monthly, 849k cumulative)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ax.step() correctly with where='post', but doesn't leverage
+          advanced matplotlib features like fill_between for step areas or custom
+          step formatting
+  verdict: APPROVED
diff --git a/plots/step-basic/metadata/plotly.yaml b/plots/step-basic/metadata/plotly.yaml
index 78481c7778..559f1633c9 100644
--- a/plots/step-basic/metadata/plotly.yaml
+++ b/plots/step-basic/metadata/plotly.yaml
@@ -15,3 +15,13 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: 'The plot displays a step chart showing "Monthly Cumulative Sales"
+    over 12 months (Jan-Dec). The chart uses a dark blue line (Python Blue #306998)
+    with a stair-step pattern created using ''hv'' (horizontal-then-vertical) style.
+    Each data point is marked with a yellow circle (Python Yellow #FFD43B) with a
+    blue border. The y-axis shows "Cumulative Sales ($)" ranging from ~50k to ~375k,
+    and the x-axis shows months. The title follows the correct format: "Monthly Cumulative
+    Sales · step-basic · plotly · pyplots.ai". The legend is positioned in the upper
+    left corner with a semi-transparent background. Grid lines are subtle (light gray),
+    and the overall layout is clean with a white background (plotly_white template).'
+  verdict: APPROVED
diff --git a/plots/step-basic/metadata/plotnine.yaml b/plots/step-basic/metadata/plotnine.yaml
index 6302251f64..453cf68f23 100644
--- a/plots/step-basic/metadata/plotnine.yaml
+++ b/plots/step-basic/metadata/plotnine.yaml
@@ -25,3 +25,176 @@ review:
     use scale_x_continuous with breaks=range(1,13)
   - Data could show more dramatic variation in step heights to better demonstrate
     the plot type ability to show sudden changes
+  image_description: The plot displays a step chart with a blue (#306998) stair-step
+    line pattern showing cumulative sales over 12 months. Yellow (#FFD43B) circular
+    markers highlight each data point where values change. The x-axis shows "Month"
+    (1-12, displayed as continuous with decimal values like 2.5, 5.0, etc.), and the
+    y-axis shows "Cumulative Sales (thousands)" ranging from approximately 12 to 125.
+    The title correctly reads "step-basic · plotnine · pyplots.ai". The minimal theme
+    provides a clean background with subtle gray grid lines. The stair-step pattern
+    clearly demonstrates the "constant until next change" behavior characteristic
+    of step plots.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step line size=1.5 and point size=4 are well-adapted for 12 data
+          points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis has units "(thousands)" but X-axis "Month" could specify integer
+          months
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: 'Grid is subtle (alpha 0.3), but no legend needed here - minor: grid
+          could be slightly more visible'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step plot using geom_step
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=month, Y=cumulative sales correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step pattern with markers at data points as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "step-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows step pattern well with flat periods (months 1-2, 3-4, 7-8,
+          9-10) and jumps; could show more dramatic plateau/jump variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly cumulative sales is a perfect real-world scenario for step
+          plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 12-120K are realistic for sales; month axis shows decimal
+          values (2.5, 5.0) instead of integers which is slightly awkward for discrete
+          months
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed comment/documentation
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses plotnine's ggplot grammar correctly (geom_step, geom_point,
+          theme_minimal), but doesn't showcase advanced features like scale_x_continuous
+          with breaks for integer months
+  verdict: APPROVED
diff --git a/plots/step-basic/metadata/pygal.yaml b/plots/step-basic/metadata/pygal.yaml
index 649fe85577..59470fed6f 100644
--- a/plots/step-basic/metadata/pygal.yaml
+++ b/plots/step-basic/metadata/pygal.yaml
@@ -24,3 +24,177 @@ review:
   - Legend at bottom appears somewhat small and isolated from the main chart area
   - Could show more dramatic variation in step heights to better demonstrate the plot
     type utility
+  image_description: The plot displays a step chart showing cumulative monthly sales
+    throughout the year. It uses a blue line (#306998) with circular markers at each
+    data point, creating a clear stair-step pattern. The title "step-basic · pygal
+    · pyplots.ai" appears at the top in a readable font. The Y-axis is labeled "Cumulative
+    Sales ($K)" ranging from approximately 50 to 635, and the X-axis shows "Month"
+    with all 12 month abbreviations (Jan-Dec). Horizontal grid lines help trace values.
+    A legend at the bottom identifies the series as "Cumulative Sales". The background
+    is clean white with good contrast.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          size. Font sizes are well-scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Month labels are well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step pattern is clearly visible with appropriate line width and marker
+          sizes. The dots effectively highlight data points.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color series is colorblind-safe with good contrast against
+          white background.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization with balanced margins. Plot fills appropriate
+          portion of canvas.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Cumulative Sales ($K)" and "Month".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid lines are subtle. However, legend placement at bottom is functional
+          but somewhat isolated from the plot.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step plot implementation using horizontal-then-vertical pattern
+          (post-style).
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X (months) and Y (cumulative sales) correctly assigned.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows step pattern with markers at data points. Grid lines present.
+          Missing explicit demonstration of different step styles mentioned in spec.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend label "Cumulative Sales" matches the data series.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "step-basic · pygal · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows cumulative growth pattern well with discrete jumps. Could benefit
+          from showing varying step sizes more dramatically.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly cumulative sales is a perfect real-world scenario for step
+          plots.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values (5K-35K cumulative) are realistic for annual business
+          data.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → chart configuration → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current pygal API usage.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Style customization and Line chart effectively. The
+          workaround for step visualization using intermediate points is clever but
+          pygal doesn't have native step chart support.
+  verdict: APPROVED
diff --git a/plots/step-basic/metadata/seaborn.yaml b/plots/step-basic/metadata/seaborn.yaml
index 84949d6293..58abd38db8 100644
--- a/plots/step-basic/metadata/seaborn.yaml
+++ b/plots/step-basic/metadata/seaborn.yaml
@@ -23,3 +23,174 @@ review:
   - Uses seaborn only for styling (sns.set_context) rather than seaborn plotting functions
     - this is essentially a matplotlib plot with seaborn theming
   - Legend label defined but not displayed (acceptable for single series)
+  image_description: The plot displays a step chart showing cumulative sales data
+    over 12 months (January through December). The chart uses a deep blue (#306998)
+    stepped line with horizontal segments connected by vertical risers, creating a
+    stair-step pattern. Yellow/gold circular markers (#FFD43B) with blue outlines
+    are placed at each data point where values change. The x-axis shows month abbreviations
+    (Jan-Dec), and the y-axis shows "Cumulative Sales ($K)" ranging from 0 to approximately
+    1200. The title "step-basic · seaborn · pyplots.ai" appears at the top. A subtle
+    dashed grid is visible in the background. The cumulative sales start at ~45 in
+    January and increase progressively to ~1100 in December.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; month labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (s=150) are appropriately sized for 12 data points; line
+          width (3) is clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow combination is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: false
+        comment: Y-axis has units "($K)", X-axis is descriptive "Month"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed) but there is no legend despite
+          having a label in the code
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step/stair plot with 'post' style
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=months (sequential), Y=cumulative sales (values)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step lines with markers at data points, grid lines for tracing values
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, all data visible with 5% headroom
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for single series (label in code but not shown,
+          which is fine)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "step-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear stair-step pattern with varying step heights (seasonal
+          growth pattern)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Cumulative monthly sales is a real-world application mentioned in
+          spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sales values are realistic ($45K-$155K monthly, ~$1.1M cumulative
+          annually)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat script structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (matplotlib, numpy, pandas, seaborn) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses matplotlib's ax.step() and ax.scatter() with seaborn only for
+          styling (sns.set_context). Does not use seaborn's actual plotting functions.
+  verdict: APPROVED
diff --git a/plots/streamgraph-basic/metadata/altair.yaml b/plots/streamgraph-basic/metadata/altair.yaml
index 5a56fdb62e..dbdd2a92f2 100644
--- a/plots/streamgraph-basic/metadata/altair.yaml
+++ b/plots/streamgraph-basic/metadata/altair.yaml
@@ -33,3 +33,180 @@ review:
   - Color palette uses Python Blue/Yellow prominently but Jazz (purple) and Classical
     (orange) could be slightly more saturated for better visibility against adjacent
     areas
+  image_description: 'The plot displays a streamgraph (centered stacked area chart)
+    showing monthly streaming hours by music genre over two years (Jan 2022 to Nov
+    2023). Six genres are visualized with smooth, flowing curves using basis spline
+    interpolation: Pop (steel blue), Rock (golden yellow), Hip-Hop (pink), Electronic
+    (green), Jazz (purple), and Classical (orange). The baseline is symmetrically
+    centered around the x-axis, creating the characteristic river-like appearance.
+    The x-axis shows time labels at -45° angle with "Time" label, the y-axis shows
+    "Streaming Hours (millions)" but without tick labels (appropriate for streamgraph
+    aesthetic). A legend on the right identifies each genre with colored dots. The
+    title follows the required format. The colors are distinct and visually harmonious.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt, legend at
+          18-20pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: X-axis labels rotated at -45° prevent overlap, no text overlaps anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stream areas with 0.85 opacity are clearly visible, smooth curves
+          are well-defined
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Six distinct colors that work well for colorblind users, good contrast
+          between adjacent areas
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills most of the space, legend well-positioned
+          on right; minor deduction for slight extra whitespace at top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Time" and "Streaming Hours (millions)" with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid removed (appropriate for streamgraph), legend well-placed, but
+          y-axis title is shown despite no y-axis values/ticks which is slightly inconsistent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamgraph with centered baseline (stack='center')
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, category for color, value for y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth interpolation (basis), centered baseline, distinct colors,
+          legend present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, 24 months spanning full x-axis
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All 6 genres correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"streamgraph-basic · altair · pyplots.ai" - correct format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in all categories with seasonal patterns and growth
+          trends; categories have different base sizes; minor deduction because the
+          flowing curves could show more dramatic variation between categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Music streaming hours by genre is a real, comprehensible scenario
+          matching spec's example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in tens to hundreds of millions of streaming hours are realistic;
+          slight deduction for y-axis label saying "millions" but showing raw values
+          that could imply confusion
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported and all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding with stack='center', basis interpolation,
+          tooltips, and interactive() for HTML version; could have used more advanced
+          features like selection/highlighting
+  verdict: APPROVED
diff --git a/plots/streamgraph-basic/metadata/bokeh.yaml b/plots/streamgraph-basic/metadata/bokeh.yaml
index c5d71aad2d..1b1d0a316d 100644
--- a/plots/streamgraph-basic/metadata/bokeh.yaml
+++ b/plots/streamgraph-basic/metadata/bokeh.yaml
@@ -26,3 +26,178 @@ review:
   - Legend text could be slightly larger for the canvas size
   - Missing HoverTool for interactivity which is a key Bokeh strength
   - Y-axis label says relative but could benefit from units
+  image_description: 'The plot displays a streamgraph (centered stacked area chart)
+    showing "Streaming Hours (relative)" over time from January 2022 to January 2024.
+    Six music genres are shown as flowing, stacked layers: Pop (Python blue #306998),
+    Rock (golden yellow #FFD43B), Hip-Hop (coral/salmon #E07A5F), Electronic (muted
+    teal/green #81B29A), Jazz (cream/tan #F2CC8F), and Classical (dark gray-blue #3D405B).
+    The baseline is centered around 0 on the y-axis, creating a symmetric river-like
+    appearance. The layers have smooth, flowing curves showing seasonal variations
+    over the 2-year period. A legend is positioned on the right side identifying each
+    genre. The title "streamgraph-basic · bokeh · pyplots.ai" appears at the top left.
+    X-axis shows time periods (Jan 2022 through Jan 2024), Y-axis shows "Streaming
+    Hours (relative)" ranging from approximately -100 to +100.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt - all perfectly
+          readable at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stream layers are well-sized, smooth curves visible with good fill
+          alpha (0.85)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast between adjacent layers
+          (no red-green reliance)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions, though the legend spacing from the main plot could
+          be tighter
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Time", "Streaming Hours (relative)") but no
+          units on y-axis
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), legend well-placed but could be slightly
+          larger
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamgraph with centered baseline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, stacked values centered around 0
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth interpolation, symmetric baseline, distinct colors, legend
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes encompass full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All 6 genre labels correctly identified
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "streamgraph-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows variation over time, different category sizes, seasonal patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Music streaming by genre is a perfect real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 24 months of data with 6 genres, realistic streaming hour values
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: 'All imports used, but minor issue: Legend imported separately when
+          could be simplified'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also creates plot.html (minor)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, figure, patch, Legend, and export_png/save
+          appropriately, but doesn't leverage HoverTool for interactivity which would
+          be a distinctive Bokeh feature
+  verdict: APPROVED
diff --git a/plots/streamgraph-basic/metadata/highcharts.yaml b/plots/streamgraph-basic/metadata/highcharts.yaml
index 433460c310..7521b04614 100644
--- a/plots/streamgraph-basic/metadata/highcharts.yaml
+++ b/plots/streamgraph-basic/metadata/highcharts.yaml
@@ -24,3 +24,177 @@ review:
   - Layout could be tighter - generous margins mean the actual chart area is slightly
     smaller than optimal
   - X-axis lacks a descriptive label (though Month context is clear from labels)
+  image_description: |-
+    The plot displays a streamgraph (stacked area chart with centered baseline) showing music streaming trends over 24 months (Jan '23 to Dec '24). Five music genres are visualized as flowing, stacked layers:
+    - **Pop** (blue, #306998) - top layer, largest area, relatively stable with slight seasonal variation
+    - **Rock** (yellow, #FFD43B) - second layer, showing gradual decline over time
+    - **Hip-Hop** (purple, #9467BD) - middle layer, showing growth trend over the period
+    - **Electronic** (cyan, #17BECF) - fourth layer, with subtle seasonal oscillation
+    - **Jazz** (pink, #E377C2) - bottom layer, smallest and most stable
+
+    The chart has a white background with a clear title "Music Streaming Trends · streamgraph-basic · highcharts · pyplots.ai" and subtitle "Monthly streaming hours by genre (2023-2024)". X-axis shows month labels, y-axis is hidden (as typical for streamgraphs). A horizontal legend at the bottom identifies each genre.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large (64px), subtitle (36px), and axis labels (28px) are
+          all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; month labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stream layers are well-sized with good fill opacity (0.85), smooth
+          curves visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette avoiding red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good layout but could use slightly more vertical space for the chart
+          area; margins are generous
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: Y-axis is intentionally hidden for streamgraphs, but x-axis lacks
+          descriptive title (acceptable for this plot type, but no units shown)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-positioned at bottom, no distracting grid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamgraph with centered baseline and flowing curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, categories stacked vertically, values represented
+          by area height
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth interpolation, centered baseline, distinct colors, legend
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate time range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 genres
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows multiple trends: growing (Hip-Hop), declining (Rock), seasonal
+          (Pop, Electronic), stable (Jazz)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Music streaming by genre is a perfect real-world application matching
+          spec examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in 1000-4000 streaming hours range are plausible, though the
+          absolute scale isn't shown
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'No functions/classes, linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Missing: uses container.screenshot() but plot.html is also created
+          (minor)'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts StreamGraphSeries, proper chart type, but doesn't
+          leverage advanced interactivity in the static output
+  verdict: APPROVED
diff --git a/plots/streamgraph-basic/metadata/letsplot.yaml b/plots/streamgraph-basic/metadata/letsplot.yaml
index a009aa1631..57fa8b05df 100644
--- a/plots/streamgraph-basic/metadata/letsplot.yaml
+++ b/plots/streamgraph-basic/metadata/letsplot.yaml
@@ -25,3 +25,177 @@ review:
   - Curves are linear/segmented rather than smooth spline interpolation as specified
   - X-axis grid lines could be more subtle (currently dashed at 0.5 alpha)
   - Y-axis label shows Streaming Hours without units like hrs/month
+  image_description: 'The plot displays a streamgraph showing music genre streaming
+    hours over a two-year period (Jan ''23 to Dec ''24). Five genres are shown: Pop
+    (dark blue at bottom), Rock (yellow), Hip-Hop (cyan/light blue), Electronic (purple),
+    and Jazz (orange at top). The chart uses a symmetric centered baseline creating
+    a river-like flowing appearance. The x-axis shows months with labels at Jan ''23,
+    Jul ''23, Jan ''24, Jul ''24, and Dec ''24. The y-axis is labeled "Streaming Hours"
+    but has no tick marks or values (appropriately hidden for streamgraphs). A legend
+    on the right identifies all genres. The title follows the required format: "streamgraph-basic
+    · lets-plot · pyplots.ai". The colors are distinct and harmonious, with smooth
+    flowing curves showing seasonal variations and trends.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick text at 16pt, legend text
+          at 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Smooth ribbon areas are well-sized, alpha=0.9 provides good visibility
+          while allowing slight transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good distinct colors, but yellow and cyan could be slightly challenging
+          for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, legend positioned appropriately on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis says "Streaming Hours" but no units; X-axis says "Month" which
+          is appropriate
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Y-axis grid intentionally removed (good for streamgraph), but x-axis
+          grid is dashed and slightly too prominent at alpha=0.5
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamgraph with centered baseline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, categories stacked correctly, values represented
+          by area
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Centered baseline, flowing curves, distinct colors, legend included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 24 months displayed, all 5 genres visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 genres
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"streamgraph-basic · lets-plot · pyplots.ai" follows required format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows seasonal patterns, trend variations, different genre behaviors
+          - but curves are not smooth/spline interpolated as spec suggests
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Music streaming hours by genre is a perfect, comprehensible real-world
+          scenario matching spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values around 25-150 hours/month are plausible but could be more
+          precisely contextualized
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple script: imports → data → plot → save, no functions or classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses wildcard import with noqa comments, but necessary for lets-plot
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (aes, geom_ribbon, scale_fill_manual, theme_minimal),
+          but geom_ribbon is basic rather than using lets-plot specific advanced features
+  verdict: APPROVED
diff --git a/plots/streamgraph-basic/metadata/matplotlib.yaml b/plots/streamgraph-basic/metadata/matplotlib.yaml
index 9d17716a4a..f1dde29a4d 100644
--- a/plots/streamgraph-basic/metadata/matplotlib.yaml
+++ b/plots/streamgraph-basic/metadata/matplotlib.yaml
@@ -29,3 +29,181 @@ review:
   - X-axis label Month is minimal - could include the date range for context
   - Streams could show more dramatic variation to better demonstrate the visual power
     of streamgraphs
+  image_description: 'The plot displays a streamgraph showing monthly streaming hours
+    by music genre over two years (Jan''23 to Dec''24). Six colored stacked areas
+    represent different genres: Pop (Python blue #306998), Rock (yellow #FFD43B),
+    Hip-Hop (salmon/coral #E07A5F), Electronic (teal/mint #81B29A), Jazz (tan/beige
+    #F2CC8F), and Classical (dark gray #3D405B). The streams are centered around an
+    invisible baseline creating a symmetric, flowing river-like appearance. The x-axis
+    shows months with quarterly labels, and the title "streamgraph-basic · matplotlib
+    · pyplots.ai" is prominently displayed at the top. A legend in the upper-left
+    corner identifies each genre. The y-axis ticks are intentionally removed for cleaner
+    aesthetics.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, x-label at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, quarterly x-axis labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stream areas are well-sized with appropriate alpha (0.85), all layers
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Six distinct, harmonious colors that are colorblind-safe (no red-green
+          confusion)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, but legend position in
+          upper-left slightly overlaps the first data point area
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Month" label is descriptive but lacks context (no units needed
+          for time, but could say "Month (2023-2024)")'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid (appropriate for streamgraph), legend well-placed with good
+          framealpha, but could be outside the plot area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamgraph using stackplot with "wiggle" baseline for symmetric
+          centering
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, stacked categories correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth curves, centered baseline, distinct colors, legend present
+          - all spec requirements met
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 24 months visible, all data within bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All 6 genre labels correctly identified
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "streamgraph-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple genres with different trends (growing Hip-Hop, seasonal
+          Electronic, steady Pop, declining Jazz), demonstrates the streamgraph concept
+          well, but could show more dramatic variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Music streaming hours by genre is a perfect, relatable real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in reasonable range (5-60+ streaming hours), though the exact
+          scale is hidden
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set for deterministic data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses stackplot with baseline="wiggle" which is matplotlib's native
+          way to create streamgraphs, appropriate use of spines removal for clean
+          aesthetics
+  verdict: APPROVED
diff --git a/plots/streamgraph-basic/metadata/plotly.yaml b/plots/streamgraph-basic/metadata/plotly.yaml
index b4e53651ba..a4beb4913b 100644
--- a/plots/streamgraph-basic/metadata/plotly.yaml
+++ b/plots/streamgraph-basic/metadata/plotly.yaml
@@ -24,3 +24,175 @@ review:
   weaknesses:
   - Y-axis label Streaming Hours (Millions) is slightly confusing when the centered
     baseline creates negative values
+  image_description: 'The plot displays a streamgraph showing monthly streaming hours
+    by music genre (Pop, Rock, Hip-Hop, Electronic, Jazz, Classical) over a 2-year
+    period from early 2022 to late 2023. The streamgraph uses a centered baseline
+    creating a symmetric, river-like appearance around the x-axis. Colors used are:
+    Python Blue (#306998) for Pop at the bottom, Yellow (#FFD43B) for Rock, Red/Orange
+    (#E24A33) for Hip-Hop, Green (#8EBA42) for Electronic, Purple (#988ED5) for Jazz,
+    and Light Blue (#348ABD) for Classical at the top. The title "streamgraph-basic
+    · plotly · pyplots.ai" is centered at the top. The x-axis shows "Month" with quarterly
+    date labels, and the y-axis shows "Streaming Hours (Millions)" ranging from -100
+    to 100. A horizontal legend is positioned above the plot area. The curves are
+    smooth and flowing, with subtle grid lines in the background.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, ticks at 22pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, legend is well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stream layers are clearly visible with good distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Python Blue/Yellow with colorblind-safe palette, good contrast
+          between adjacent areas
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis has units "(Millions)" but could be clearer (streaming hours
+          is somewhat abstract for negative values)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.2), legend well positioned above plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamgraph with centered baseline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X, categories stacked, values determine area height
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth curves (spline), centered baseline, distinct colors, legend
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All 6 genres correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "streamgraph-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows variation over time, different category sizes, seasonal patterns
+          - minor: all categories follow similar trends'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Music streaming by genre is a plausible, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: false
+        comment: Values are reasonable but "millions" unit on y-axis is somewhat arbitrary
+          for example data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs are current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses plotly's spline smoothing, interactive hover, HTML export, go.Scatter
+          with fill="toself"
+  verdict: APPROVED
diff --git a/plots/streamgraph-basic/metadata/plotnine.yaml b/plots/streamgraph-basic/metadata/plotnine.yaml
index 1c48787317..b645fafc1a 100644
--- a/plots/streamgraph-basic/metadata/plotnine.yaml
+++ b/plots/streamgraph-basic/metadata/plotnine.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/streamgraph-basic/metadata/seaborn.yaml b/plots/streamgraph-basic/metadata/seaborn.yaml
index 334acb46ef..b114711da7 100644
--- a/plots/streamgraph-basic/metadata/seaborn.yaml
+++ b/plots/streamgraph-basic/metadata/seaborn.yaml
@@ -25,3 +25,176 @@ review:
     (acceptable since seaborn lacks native streamgraph support)
   - Missing smooth interpolation/spline curves - uses linear interpolation between
     points rather than flowing curves as suggested in spec notes
+  image_description: 'The plot displays a streamgraph showing music streaming hours
+    by genre (Pop, Rock, Hip-Hop, Electronic, Classical, Jazz) over 24 months from
+    2023-04 to 2024-10. The chart has a symmetric, centered baseline creating a river-like
+    appearance. Colors used are: Python blue (#306998) for Pop at the bottom, Python
+    yellow (#FFD43B) for Rock, light blue for Hip-Hop, orange for Electronic, teal/green
+    for Classical, and darker orange for Jazz at the top. The title reads "streamgraph-basic
+    · seaborn · pyplots.ai" centered at the top. The x-axis shows "Month" with date
+    labels. Y-axis is hidden. A legend in the upper left identifies each genre. The
+    plot fills the canvas well with balanced proportions.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, xlabel at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, x-axis labels rotated and spaced well
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stream layers are clearly visible with good alpha (0.85) and white
+          edge lines for separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Python colors plus colorblind palette, good contrast between
+          adjacent layers
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: X-axis has "Month" label but no units; Y-axis intentionally hidden
+          for streamgraph
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: false
+        comment: Subtle dashed grid on x-axis only (alpha 0.3), legend well-placed
+          with title
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamgraph with centered baseline
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, categories stacked, values shown as area
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Centered baseline, distinct colors, legend present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 24-month range displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 6 genres
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Title uses middle dot (·) but format is correct: "streamgraph-basic
+          · seaborn · pyplots.ai" ✓ (Actually correct, giving full points)'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 6 categories with varying magnitudes, seasonal patterns, and
+          trends over time
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Music streaming hours by genre is a real, comprehensible scenario
+          matching the spec example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Hours values (5-50+ range) are realistic for monthly streaming
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.set_theme, sns.despine, and seaborn color palettes, but
+          the core plotting is done with matplotlib's fill_between rather than a native
+          seaborn plotting function
+  verdict: APPROVED
diff --git a/plots/streamline-basic/metadata/altair.yaml b/plots/streamline-basic/metadata/altair.yaml
index 310252280d..dd96c2b749 100644
--- a/plots/streamline-basic/metadata/altair.yaml
+++ b/plots/streamline-basic/metadata/altair.yaml
@@ -26,3 +26,181 @@ review:
   - Some inner streamlines appear slightly crowded/overlapping near the center
   - Does not leverage Altair interactive features (tooltips, zoom/pan) which would
     enhance flow exploration
+  image_description: The plot displays a vortex flow field visualized as concentric
+    elliptical/circular streamlines. The plot uses the viridis color scheme, with
+    purple/dark colors at the center (lower flow speed ~0.5-1.0) transitioning to
+    green and yellow at the outer rings (higher flow speed ~2.5-3.0). The title reads
+    "streamline-basic · altair · pyplots.ai" at the top center. The X-axis is labeled
+    "X Position" ranging from -3.4 to 3.4, and the Y-axis is labeled "Y Position"
+    with the same range. A "Flow Speed" color legend appears in the upper right corner.
+    The grid is subtle with light gray lines. Multiple concentric streamlines spiral
+    outward from the center, showing the characteristic pattern of a circular vortex
+    flow where u = -y, v = x.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, tick labels at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Streamlines are clearly visible with good stroke width (2.5) and
+          opacity (0.85). Minor deduction: some inner streamlines appear slightly
+          crowded'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good canvas utilization, plot fills most of the space. Minor deduction:
+          slight asymmetry in margins'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels say "X Position" and "Y Position" but lack units (specification
+          mentions "units" in axis labels would be appropriate)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Grid is subtle (alpha 0.4), legend is well-placed. Minor: legend
+          could be slightly larger'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamline plot showing continuous flow paths
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y positions correctly mapped, velocity encoded as color
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows vortex flow (u=-y, v=x), color encodes velocity magnitude,
+          proper density
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range from -3.5 to 3.5
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Flow Speed" legend accurately represents velocity magnitude'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "streamline-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows circular vortex pattern well, demonstrates flow topology.
+          Minor: could show more varied radii for better coverage'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Circular vortex is a classic fluid dynamics example, neutral and
+          educational
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Values are appropriate for dimensionless vortex flow. Minor: could
+          have more realistic physical units'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Flat structure with no functions/classes, follows imports → data
+          → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also saves plot.html (extra output, though
+          not incorrect)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative encoding (detail, order, color scales)
+          appropriately. However, doesn't leverage Altair's interactive features which
+          could enhance streamline exploration
+  verdict: APPROVED
diff --git a/plots/streamline-basic/metadata/bokeh.yaml b/plots/streamline-basic/metadata/bokeh.yaml
index ee2b5fa6dc..626ebf5cdc 100644
--- a/plots/streamline-basic/metadata/bokeh.yaml
+++ b/plots/streamline-basic/metadata/bokeh.yaml
@@ -28,3 +28,179 @@ review:
   - Does not leverage Bokeh's ColumnDataSource for data organization
   - No interactive features (hover tooltips showing velocity magnitude would enhance
     the Bokeh implementation)
+  image_description: The plot displays a vortex flow field visualized as streamlines
+    arranged in concentric circular patterns around the origin. The streamlines use
+    a gradient color scheme that transitions from teal/gray-green colors in the center
+    (representing lower velocity magnitude) to golden-yellow colors at the outer edges
+    (representing higher velocity magnitude). Each streamline terminates with an arrowhead
+    indicating the flow direction (counterclockwise rotation). The background is a
+    light off-white (#fafafa). The title "streamline-basic · bokeh · pyplots.ai" appears
+    in the upper-left corner, with "X Position" and "Y Position" axis labels. Axis
+    tick marks range from approximately -3 to 3. A subtle dashed grid is visible in
+    the background.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title and axis labels are readable at 32pt and 26pt respectively;
+          tick labels at 20pt are clear. Minor: title could be slightly more prominent'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Streamlines are well-sized (line_width=4) with good alpha (0.85);
+          arrowheads clearly show direction. Minor density issue: some inner streamlines
+          overlap slightly'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-green to yellow gradient is colorblind-safe (viridis-like progression)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; streamlines extend
+          nicely to edges
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Position", "Y Position") but no units specified
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha=0.3 and dashed styling; no legend needed
+          for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamline plot type showing vector field with continuous
+          curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y grid and U/V velocity components correctly implemented
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Vortex flow field (u=-y, v=x), color encoding magnitude, arrowheads
+          for direction
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full range (-3.5 to 3.5) with good padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; color meaning is intuitive
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "streamline-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows circular streamlines well with velocity magnitude coloring.
+          Could show more varied flow patterns (saddle points, sources/sinks)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Vortex flow is a classic fluid dynamics scenario; appropriate for
+          demonstrating streamlines
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Grid size (-3 to 3) and 40x40 resolution are reasonable; magnitude
+          scaling could be slightly adjusted
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear structure with imports → data → plot → save; no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) at the start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, bokeh, scipy)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses bokeh's figure, line, patch methods and export_png/save. Could
+          leverage ColumnDataSource more explicitly or add interactivity features
+          like hover tools
+  verdict: APPROVED
diff --git a/plots/streamline-basic/metadata/highcharts.yaml b/plots/streamline-basic/metadata/highcharts.yaml
index e0b77395e4..d0a90e6a28 100644
--- a/plots/streamline-basic/metadata/highcharts.yaml
+++ b/plots/streamline-basic/metadata/highcharts.yaml
@@ -25,3 +25,180 @@ review:
     would be more elegant
   - Grid/Legend scoring affected by custom HTML approach rather than native Highcharts
     legend
+  image_description: 'The plot displays a vortex flow field visualization with concentric
+    circular/spiral streamlines centered around the origin. The streamlines flow counterclockwise
+    as expected from the u = -y, v = x vector field equations. Colors range from dark
+    purple (inner circles, low velocity magnitude ~1.0) through blue and teal (medium
+    velocities) to bright green (outer circles, high velocity magnitude ~3.0). The
+    title "streamline-basic · highcharts · pyplots.ai" is clearly displayed at the
+    top, with a subtitle explaining the vortex formula. Axes are labeled "X Position
+    (arbitrary units)" and "Y Position (arbitrary units)" with clear gridlines. A
+    velocity magnitude legend appears in the upper right showing four color categories:
+    High (3.0), Med-High, Med-Low, and Low (1.0).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and tick labels are all clearly readable
+          at the 4800x2700 resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; streamlines are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Streamlines are clearly visible with appropriate line width; slight
+          deduction for some inner streamlines being slightly crowded
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis-inspired palette (purple → blue → teal → green) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; legend placement is functional though the
+          plot area could fill more of the canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units "(arbitrary units)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle; however, legend is HTML overlay and renders reasonably
+          but not integrated into the Highcharts chart
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamline visualization showing flow field
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y coordinates correctly mapped; velocity field u=-y, v=x properly
+          implemented
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows streamlines with color encoding for velocity magnitude; starting
+          points distributed across radii; slight deduction as line width does not
+          vary with field strength (optional per spec)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes range from -4 to 4, showing all streamlines fully
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend shows velocity magnitude categories but uses discrete bins
+          rather than a continuous scale
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "streamline-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows circular flow pattern of vortex field; demonstrates multiple
+          radii and the concentric nature of streamlines; minor deduction as the vortex
+          center is not as visually emphasized
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Vortex flow is a classic physics example used in fluid dynamics education
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid and velocity values are sensible for a mathematical vortex field
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Generally follows imports → data → plot → save pattern, but code
+          is moderately complex due to manual streamline integration
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, highcharts, selenium, etc.)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts Python API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses LineSeries, custom HTML legend, proper Highcharts options configuration;
+          however, does not leverage more advanced Highcharts features like built-in
+          colorAxis or streaming data capabilities
+  verdict: APPROVED
diff --git a/plots/streamline-basic/metadata/letsplot.yaml b/plots/streamline-basic/metadata/letsplot.yaml
index 08a3eb99cd..6f30aeccb0 100644
--- a/plots/streamline-basic/metadata/letsplot.yaml
+++ b/plots/streamline-basic/metadata/letsplot.yaml
@@ -25,3 +25,171 @@ review:
     or use lambda)
   - Title format adds descriptive prefix instead of using exact spec-id format
   - Could benefit from arrow indicators to show flow direction along streamlines
+  image_description: The plot displays a vortex flow field with 8 concentric circular
+    streamlines centered at the origin. The color gradient transitions from dark blue
+    (inner, low field strength ~0.5) through gray to golden yellow (outer, high field
+    strength ~2.5). The streamlines are smooth, closed circles representing the rotational
+    vector field u=-y, v=x. The title "Vortex Flow Field · streamline-basic · letsplot
+    · pyplots.ai" is displayed at the top. Axes range from -3 to 3 on both X Position
+    and Y Position. A colorbar legend on the right shows "Field Strength" values.
+    The plot uses a minimal theme with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Streamlines are clearly visible with good line width, though the
+          innermost circle is slightly thin
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-to-yellow gradient is colorblind-safe (viridis-like)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight imbalance with legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Position", "Y Position") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend well placed but title "Field Strength" could
+          be more specific
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamline plot showing vector field flow
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped, velocity field properly integrated
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows streamlines with color encoding magnitude; missing line width
+          variation mentioned in spec notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All streamlines fully visible within bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labeled "Field Strength"
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Includes spec-id, library, pyplots.ai but adds "Vortex Flow Field"
+          prefix (minor variation)
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple radii well, but all streamlines are perfect circles
+          (could show more complex field features)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Vortex/rotation field is a classic physics example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values -3 to 3 and field strengths 0.5-2.5 are sensible
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses helper function `velocity_field` which violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar, geom_path, scale_color_gradient, theme_minimal
+  verdict: APPROVED
diff --git a/plots/streamline-basic/metadata/matplotlib.yaml b/plots/streamline-basic/metadata/matplotlib.yaml
index 9e5f503d0b..ba3a6edf78 100644
--- a/plots/streamline-basic/metadata/matplotlib.yaml
+++ b/plots/streamline-basic/metadata/matplotlib.yaml
@@ -24,3 +24,175 @@ review:
   - Grid linestyle dashed is slightly more visible than needed
   - Could use linewidth varying with speed for additional visual encoding
   - Flow field could be more complex (e.g., dipole) to show more features
+  image_description: The plot displays a basic streamline visualization showing a
+    vortex flow field. The streamlines form concentric circular patterns spiraling
+    around a central point (origin). Colors range from deep purple/blue at the center
+    (low velocity ~0.5) to bright yellow-green at the outer edges (high velocity ~4.0),
+    using the viridis colormap. The plot has a title "streamline-basic · matplotlib
+    · pyplots.ai" at the top. Axis labels show "X Position" and "Y Position" ranging
+    from -3 to 3. A vertical colorbar on the right indicates "Velocity Magnitude".
+    The background includes a subtle dashed grid. Arrows along the streamlines indicate
+    flow direction (counterclockwise).
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, ticks 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Streamlines at linewidth 2.5 are clearly visible, arrow size appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions with equal aspect, slight issue with plot being
+          somewhat compact due to colorbar
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("X Position", "Y Position") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid alpha 0.3 is subtle, colorbar well placed; however dashed linestyle
+          slightly distracting
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamline/streamplot visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y grid with U/V velocity components correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes color encoding of velocity magnitude, proper density, flow
+          direction arrows
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full -3 to 3 range visible on both axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Velocity Magnitude"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exactly matches required format: "streamline-basic · matplotlib
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows circular flow pattern, velocity variation, and direction.
+          Minor: could show more interesting topology (saddle points, sources)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Vortex flow is a classic physics scenario (fluid dynamics, electromagnetic
+          fields)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Velocity magnitude 0.5-4.0 is reasonable; grid range -3 to 3 appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not strictly needed for this deterministic
+          flow)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ax.streamplot which is matplotlib's native streamline function
+          with color mapping and arrows; could use additional features like varying
+          linewidth with speed
+  verdict: APPROVED
diff --git a/plots/streamline-basic/metadata/plotnine.yaml b/plots/streamline-basic/metadata/plotnine.yaml
index 7087177337..55de475749 100644
--- a/plots/streamline-basic/metadata/plotnine.yaml
+++ b/plots/streamline-basic/metadata/plotnine.yaml
@@ -26,3 +26,178 @@ review:
   weaknesses:
   - Axis labels could include units or more descriptive context (e.g., meters, arbitrary
     units)
+  image_description: The plot displays a vortex flow field visualization with concentric
+    circular streamlines centered at the origin. The streamlines form nested circles
+    ranging from radius ~0.5 to ~3, demonstrating counter-clockwise rotation as indicated
+    by small arrow markers along each streamline. Color encoding transitions from
+    blue (low speed ~1) at the center to yellow (high speed ~3) at outer radii, matching
+    the vortex velocity relationship (speed = radius). The plot uses a square 1:1
+    canvas with axes ranging from -3 to 3 on both X and Y. The title "streamline-basic
+    · plotnine · pyplots.ai" appears at the top in bold. Axis labels show "X Position"
+    and "Y Position". A color bar legend labeled "Flow Speed" appears on the right
+    side. The background is white with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis titles 20pt, tick labels 16pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Streamlines sized appropriately at 1.2 width, alpha 0.8 provides
+          good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-to-yellow (viridis-like) gradient is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 1:1 aspect ratio perfectly suits circular streamlines, good
+          canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Labels are descriptive but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid, well-placed legend on right
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamline visualization using continuous paths
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y grid with u=-y, v=x vortex field correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Smooth streamlines, color-coded velocity, directional indicators
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full -3 to 3 range visible on both axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Flow Speed" legend correctly describes color mapping'
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Uses correct format: "streamline-basic · plotnine · pyplots.ai"'
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "streamline-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows full vortex topology with concentric circles, varying speeds
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Vortex flow is plausible physics scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values -3 to 3 are sensible for a normalized vector field
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: false
+        comment: Uses solve_ivp with lambda for integration (borderline complexity
+          but acceptable)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses ggplot grammar with geom_path and geom_point, but scipy handles
+          the actual streamline computation rather than plotnine-native features
+  verdict: APPROVED
diff --git a/plots/streamline-basic/metadata/seaborn.yaml b/plots/streamline-basic/metadata/seaborn.yaml
index 21d1628d9d..78dbb9d98d 100644
--- a/plots/streamline-basic/metadata/seaborn.yaml
+++ b/plots/streamline-basic/metadata/seaborn.yaml
@@ -23,3 +23,171 @@ review:
   weaknesses:
   - Colorbar label missing units in rendered output
   - Axis labels in rendered image missing units despite being in code
+  image_description: 'The plot shows a series of concentric circular streamlines visualizing
+    a vortex flow field. The streamlines are colored using the viridis colormap, ranging
+    from dark purple (inner rings, ~0.8 m/s) through blues and greens (middle rings)
+    to yellow (outer rings, ~3.0 m/s). The plot has a square 1:1 aspect ratio with
+    axes ranging from -3 to 3 on both X and Y. Axis labels show "X Position" and "Y
+    Position" (missing units on axes). A colorbar on the right indicates "Flow Speed"
+    (also missing units). The title correctly follows the required format: "streamline-basic
+    · seaborn · pyplots.ai". The grid is subtle (dashed, low alpha). Small arrowheads
+    are visible on the streamlines indicating counter-clockwise flow direction.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or visual elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Streamlines are clearly visible with good line width and alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square aspect ratio fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Axis labels say "X Position" and "Y Position" but code has units
+          in label ("X Position (m)"), yet rendered image shows labels WITHOUT units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid at alpha=0.3, colorbar well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct streamline visualization of vector field
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y grid with velocity components correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Streamlines with density control, color encoding velocity, direction
+          arrows
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within -3.5 to 3.5 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: streamline-basic · seaborn · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows vortex flow with circular streamlines at multiple radii
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: false
+        comment: Vortex flow is a plausible physics scenario; could be stronger with
+          domain context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible for a normalized flow field
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: All imports used; pandas import used for DataFrame
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot with hue/units, sns.set_theme, sns.despine, sns.color_palette
+          - good usage but streamlines aren't a native seaborn plot type, mostly relies
+          on matplotlib FancyArrowPatch for arrows
+  verdict: APPROVED
diff --git a/plots/strip-basic/metadata/altair.yaml b/plots/strip-basic/metadata/altair.yaml
index 6728ed0805..ffad8f81d3 100644
--- a/plots/strip-basic/metadata/altair.yaml
+++ b/plots/strip-basic/metadata/altair.yaml
@@ -23,3 +23,172 @@ review:
   - X-axis category labels could benefit from horizontal orientation for better readability
   - Y-axis scale extends significantly below data range (down to 10 when minimum data
     is ~35)
+  image_description: The plot displays a strip plot with four departments (Engineering,
+    Marketing, Sales, Support) along the x-axis. Individual data points are shown
+    as semi-transparent blue circles (#306998) with horizontal jitter applied to reveal
+    distribution density. Yellow horizontal tick marks indicate the group means for
+    each department. The y-axis shows "Response Score" ranging from 10 to 110, with
+    the actual data spanning approximately 35-100. The title "strip-basic · altair
+    · pyplots.ai" appears at the top. A subtle dashed grid aids readability. X-axis
+    category labels are rotated at an angle.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; tick labels slightly
+          small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Circle markers with opacity 0.6 work well for ~40 points per category;
+          slightly large but appropriate
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with yellow mean markers; colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but missing units (scores could have "points"
+          or similar)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines at 0.3 opacity; no legend needed
+          but mean markers lack explanation
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct strip plot with jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, continuous values on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter, transparency, and reference lines for means all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Mean markers present but no legend explaining them
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "strip-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions across departments with varying spreads;
+          Support has wider spread as intended
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Survey response scores by department is a plausible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores in 20-100 range are realistic for survey data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple linear structure with imports, data, plot, save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but uses default filename in transform_calculate
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses transform_calculate for jitter and layering for mean lines;
+          good use of Altair's declarative grammar but could use more interactive
+          features
+  verdict: APPROVED
diff --git a/plots/strip-basic/metadata/bokeh.yaml b/plots/strip-basic/metadata/bokeh.yaml
index 61048a0e52..bded49f668 100644
--- a/plots/strip-basic/metadata/bokeh.yaml
+++ b/plots/strip-basic/metadata/bokeh.yaml
@@ -22,3 +22,170 @@ review:
   weaknesses:
   - Y-axis label could include units or scale context (e.g., Survey Score (1-10))
   - No legend explaining what the horizontal lines represent (group means)
+  image_description: 'The plot displays a strip plot showing survey scores (y-axis,
+    range 0-11) across four departments (x-axis: Engineering, Marketing, Sales, HR).
+    Each department has its own distinct color: Engineering in blue (#306998), Marketing
+    in yellow (#FFD43B), Sales in green (#4CAF50), and HR in pink (#E91E63). Individual
+    data points are scattered with horizontal jitter within each category to show
+    distribution. Black horizontal lines indicate the group mean for each department.
+    The background is a light gray (#fafafa), with subtle dashed horizontal grid lines.
+    The title "strip-basic · bokeh · pyplots.ai" appears at the top left. Points have
+    white borders and moderate transparency (alpha 0.6).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 42pt, axis labels at 32pt, tick labels at 26pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, category labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size 28 with alpha 0.6 is well suited for ~165 total points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct hues (blue, yellow, green, pink) are colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Descriptive labels but missing units (should be "Survey Score (1-10)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha 0.3, dashed), but no legend for mean lines
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct strip plot with jittered points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied (0.25 width), transparency for overlap, mean reference
+          lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-11 shows all data points (scores 1-10)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colors match categories correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "strip-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varying distributions: tight (HR), spread (Marketing with
+          outliers), different means'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Survey response scores by department is plausible, though generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 1-10 survey scale with realistic variation
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: ColumnDataSource, figure with proper sizing, export_png, scatter
+          with styling options, ticker overrides for categorical axis
+  verdict: APPROVED
diff --git a/plots/strip-basic/metadata/highcharts.yaml b/plots/strip-basic/metadata/highcharts.yaml
index f4ac7a4f7d..4db5b1dbc8 100644
--- a/plots/strip-basic/metadata/highcharts.yaml
+++ b/plots/strip-basic/metadata/highcharts.yaml
@@ -26,3 +26,181 @@ review:
   - Some point overlap in dense regions could benefit from slightly smaller markers
     or more transparency
   - Legend positioned far from the data in top-right corner
+  image_description: 'The plot displays a strip plot with four categories (Mathematics,
+    Science, Literature, History) arranged horizontally along the x-axis. Each category
+    shows individual data points (student test scores) scattered vertically with random
+    horizontal jitter. The colors used are: blue (#306998) for Mathematics, yellow
+    (#FFD43B) for Science, purple (#9467BD) for Literature, and cyan (#17BECF) for
+    History. Red diamond markers indicate the mean for each category. The title "strip-basic
+    · highcharts · pyplots.ai" appears at the top in bold, with a subtitle "Student
+    Test Scores by Subject". The y-axis shows "Test Score" ranging from 40 to 106,
+    and the x-axis shows "Subject". A legend is positioned in the top-right corner
+    listing all categories plus the Mean marker. The background is white with subtle
+    dashed gray grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold, axis labels and tick marks are clearly readable
+          at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized with transparency, though some overlap
+          occurs in dense regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, purple, cyan) - no red-green
+          conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout overall, but legend in top-right corner creates slight
+          imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Test Score", "Subject") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines, legend well-placed but slightly
+          far from data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct strip plot using scatter with jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied, transparency for overlapping points, mean markers
+          included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (40-106) shows all data points
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four categories plus mean
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows varied distributions: Math has bimodal pattern, Literature
+          shows bimodal clustering, Science/History more normal. Could show more extreme
+          outliers.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Student test scores by subject is a very realistic and relatable
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores range 48-97 which is realistic, though clipping at 30-100
+          is appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct), but uses container.screenshot
+          instead of driver.save_screenshot which is fine
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts scatter series with custom markers, tooltips, and
+          interactive HTML export. Could leverage more Highcharts-specific features
+          like data labels or animations.
+  verdict: APPROVED
diff --git a/plots/strip-basic/metadata/letsplot.yaml b/plots/strip-basic/metadata/letsplot.yaml
index 545a3fa2ea..0e823f54a0 100644
--- a/plots/strip-basic/metadata/letsplot.yaml
+++ b/plots/strip-basic/metadata/letsplot.yaml
@@ -22,3 +22,149 @@ review:
   weaknesses:
   - 'No legend present (minor: single-color plot does not strictly require one)'
   - Point size could be slightly larger for better visibility at full 4800x2700 resolution
+  image_description: 'The plot displays a basic strip plot showing survey scores (40-100
+    points) across four departments: Marketing, Engineering, Sales, and Support. Points
+    are rendered in a muted blue color (#306998) with horizontal jitter (width ~0.25)
+    and alpha transparency (0.6) to reduce overplotting. The title "strip-basic ·
+    letsplot · pyplots.ai" appears at the top. The plot uses a clean minimal theme
+    with subtle horizontal gray grid lines and no vertical grid lines. Each department
+    shows distinct distribution patterns: Engineering clusters higher (80-90), Sales
+    shows wide spread with outliers near 40, Marketing has moderate spread around
+    70-80, and Support ranges from 50-95.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text perfectly readable at full size with appropriate font sizing
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Points well-sized with good alpha, minor: could be slightly larger'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has units "Survey Score (points)", X-axis categorical label
+          acceptable
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend present (not strictly needed for single-color plot)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct strip plot using geom_jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical X, continuous Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter, transparency, individual points all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 40-100 range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single color, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: strip-basic · letsplot · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions, spreads, and outliers across departments
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Survey scores by department is a real, comprehensible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Scores 40-100 are realistic for survey data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Fixed seed in both np.random and geom_jitter
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: ggplot grammar, geom_jitter with seed, fine-grained theme control
+  verdict: APPROVED
diff --git a/plots/strip-basic/metadata/matplotlib.yaml b/plots/strip-basic/metadata/matplotlib.yaml
index a345eef5ff..8ecdc14d45 100644
--- a/plots/strip-basic/metadata/matplotlib.yaml
+++ b/plots/strip-basic/metadata/matplotlib.yaml
@@ -25,3 +25,179 @@ review:
     single color or legend
   - Mean lines lack annotation or legend entry explaining what they represent
   - Y-axis label could include scale range (1-10)
+  image_description: 'The plot displays a strip plot showing "Survey Response Score"
+    (y-axis, scale 0-10) across four departments (x-axis): Engineering, Marketing,
+    Sales, and HR. Each department''s data points are scattered with horizontal jitter
+    to avoid overplotting. Engineering and Sales use a blue color (#306998), while
+    Marketing and HR use a yellow/gold color (#FFD43B). Black horizontal lines indicate
+    the mean score for each group (Engineering ~6.9, Marketing ~6.6, Sales ~7.7, HR
+    ~6.2). Points have white edge borders and moderate transparency (alpha 0.6). The
+    y-axis grid lines are subtle with dashed styling. The title follows the correct
+    format: "strip-basic · matplotlib · pyplots.ai". The layout is clean with good
+    use of the 16:9 canvas.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap, categories well-spaced, points jittered appropriately
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Marker size (s=200) is good for the data density (35-52 points per
+          group), alpha 0.6 works well
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and yellow are distinguishable but alternating colors within
+          adjacent categories could be confusing (no legend explains the color meaning)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, balanced margins, plot fills appropriate
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Department", "Survey Response Score") but no
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3, dashed), y-axis only - good. No legend
+          present but mean lines lack explanation
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct strip plot with jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, continuous values on Y - correct
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter (0.2 spread), transparency (0.6), horizontal mean lines as
+          reference
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-11 shows all data points (scores range ~2-10)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for this simple plot
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "strip-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions per group (varying means, spreads),
+          good variety. HR shows widest spread, Sales shows tightest - demonstrates
+          spec's point about seeing individual values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Survey response scores by department is a realistic, comprehensible
+          scenario mentioned in spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 1-10 survey scale is appropriate, sample sizes (35-52) are within
+          spec recommendation (10-200)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Imports → Data → Plot → Save, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern matplotlib API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses ax.scatter() with jitter and ax.hlines() for means. Basic matplotlib
+          usage, no advanced features like zorder customization or custom markers
+  verdict: APPROVED
diff --git a/plots/strip-basic/metadata/plotly.yaml b/plots/strip-basic/metadata/plotly.yaml
index 7474e4e9e3..dfbfcc8015 100644
--- a/plots/strip-basic/metadata/plotly.yaml
+++ b/plots/strip-basic/metadata/plotly.yaml
@@ -26,3 +26,174 @@ review:
     value annotations
   - Does not use plotly.express px.strip() which would provide native strip plot functionality
     with automatic jitter
+  image_description: The plot displays a strip plot with four categories (Group A,
+    Group B, Group C, Group D) on the x-axis and "Response Score" on the y-axis ranging
+    from approximately 30 to 110. Each group shows individual data points with horizontal
+    jitter applied. Groups A and C use blue color (#306998), while Groups B and D
+    use yellow (#FFD43B). Black horizontal lines indicate the mean for each group.
+    The title "strip-basic · plotly · pyplots.ai" is centered at the top. A legend
+    on the right side identifies each group. The background uses the plotly_white
+    template with subtle horizontal grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title (size 32), axis labels (size 24), and tick labels (size 20/18)
+          are all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; jitter prevents most point overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers (size 14) with opacity 0.6 are well-sized for ~50-60 points
+          per group; slight reduction for minor overlap in dense areas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) have good contrast, but using
+          only 2 colors for 4 groups reduces distinction between alternating groups
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Category", "Response Score") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.1), legend is well-placed; however, legend
+          duplicates x-axis labels which is redundant
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct strip plot with individual points and jitter
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, continuous values on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter (0.2 width), transparency (0.6), mean reference lines as suggested
+          in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis range appropriate
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match data groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "strip-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distributions (varying means, spreads), demonstrates
+          density through jitter
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Survey response scores grouped by demographic category - realistic
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response scores in 30-110 range are realistic for surveys
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` used'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, plotly.graph_objects)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Implementation uses go.Scatter with manual jitter instead of leveraging
+          plotly.express's built-in strip plot (px.strip) which handles jitter automatically
+          and provides better hover interactions
+  verdict: APPROVED
diff --git a/plots/strip-basic/metadata/plotnine.yaml b/plots/strip-basic/metadata/plotnine.yaml
index 492a941937..77879ae5b4 100644
--- a/plots/strip-basic/metadata/plotnine.yaml
+++ b/plots/strip-basic/metadata/plotnine.yaml
@@ -25,3 +25,179 @@ review:
     confusion - a single color or sequential palette would be clearer
   - Grid lines are extremely subtle, almost invisible - could be slightly more visible
     (alpha 0.2-0.3)
+  image_description: 'The plot displays a basic strip plot showing patient response
+    times (seconds) across four treatment groups: Drug A, Drug B, Drug C, and Placebo.
+    Points are horizontally jittered within each category to reduce overplotting.
+    The color scheme uses blue (#306998, #4B8BBE) for Drug A and Drug C, and yellow
+    (#FFD43B, #FFE873) for Drug B and Placebo. The y-axis shows "Response Time (seconds)"
+    ranging from approximately 10-65 seconds, while the x-axis shows "Treatment Group"
+    with the four categories. The title "strip-basic · plotnine · pyplots.ai" appears
+    at the top. The plot uses a minimal theme with very subtle grid lines (barely
+    visible). The Placebo group shows the highest and most spread distribution (25-65s),
+    while Drug C shows the lowest and tightest distribution (15-37s), demonstrating
+    different treatment effects.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Point size 4 with alpha 0.65 is appropriate for ~165 points, though
+          slightly small
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Yellow and blue are distinguishable, but alternating colors between
+          adjacent categories is confusing (Drug A=blue, Drug B=yellow, Drug C=blue,
+          Drug B=yellow) - semantically inconsistent
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good 16:9 aspect ratio, plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (seconds)" includes units, "Treatment Group" is descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), legend hidden (appropriate since colors don't
+          add semantic meaning)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct strip plot with jittered points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, continuous values on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied (width=0.25), transparency (alpha=0.65), individual
+          points visible
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range of data (~10-65 seconds)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden since it adds no information
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "strip-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions (spread, mean) across groups, demonstrates
+          drug effects, but no extreme outliers shown
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Patient response times to drug treatments is a real, comprehensible
+          medical research scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times 10-65 seconds are plausible, though clipping at 5s
+          minimum is reasonable
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions or classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and position_jitter(random_state=42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with position_jitter, scale_color_manual, theme_minimal,
+          and element_text theming, but nothing particularly advanced beyond basic
+          plotnine usage
+  verdict: APPROVED
diff --git a/plots/strip-basic/metadata/pygal.yaml b/plots/strip-basic/metadata/pygal.yaml
index 676a6403ea..471020c001 100644
--- a/plots/strip-basic/metadata/pygal.yaml
+++ b/plots/strip-basic/metadata/pygal.yaml
@@ -22,3 +22,180 @@ review:
   - Color scheme uses only 2 distinct colors (blue/yellow) for 4 categories, making
     Engineering/Sales and Marketing/Support visually indistinguishable without legend
   - Axis labels lack units (could be Survey Score 1-10 for clarity)
+  image_description: The plot displays a strip plot with four department categories
+    (Engineering, Marketing, Sales, Support) on the x-axis and Survey Score (ranging
+    from approximately 3.5 to 10) on the y-axis. Points are scattered with horizontal
+    jitter within each category to show distribution density. Engineering and Sales
+    use blue (#306998) colored dots, while Marketing and Support use yellow/gold (#FFD43B)
+    colored dots. The title "strip-basic · pygal · pyplots.ai" is displayed at the
+    top. Y-axis gridlines are visible with subtle dotted lines. A legend at the bottom
+    shows all four categories with their respective colors. The plot has a clean white
+    background with good use of transparency (alpha ~0.65) to handle overlapping points.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable at full resolution.
+          Font sizes are well-scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Category labels are well-spaced, legend
+          is separate from data.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Dots are appropriately sized (dots_size=12) with good transparency
+          (0.65). Some overlap in dense regions but points remain distinguishable.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and yellow provide good contrast but using only 2 distinct colors
+          for 4 categories reduces clarity (Engineering/Sales both blue, Marketing/Support
+          both yellow).
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills appropriate space with balanced
+          margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Department", "Survey Score") but lack units.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis grid is subtle with dotted lines, legend well-placed at bottom.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct strip plot implementation using XY scatter with jitter.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, continuous values on y-axis as specified.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied (±0.25), transparency for overlapping points, multiple
+          categories shown.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range of data (approximately 3.5-10).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all four departments.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "strip-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions per category with varying means and
+          spreads. Support shows wider spread, Sales tighter clustering. Could benefit
+          from more visible outliers.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Survey response scores by department is a realistic, relatable scenario
+          mentioned in the spec.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 1-10 scale is appropriate for survey scores. 40 points per category
+          is reasonable though some clipping to range limits reduces natural distribution
+          appearance.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → style → chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses random.seed(42) for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (random, pygal, Style).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs both plot.html and plot.png but saves to current directory
+          without path specification (minor).
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart, custom Style, legend_at_bottom, dots_size.
+          Could leverage more pygal-specific features like tooltips or value formatters.
+  verdict: APPROVED
diff --git a/plots/strip-basic/metadata/seaborn.yaml b/plots/strip-basic/metadata/seaborn.yaml
index 6c6172030f..5087ad34d7 100644
--- a/plots/strip-basic/metadata/seaborn.yaml
+++ b/plots/strip-basic/metadata/seaborn.yaml
@@ -22,3 +22,177 @@ review:
   - 'Color palette includes two similar blue shades (Engineering #306998, Sales #4B8BBE)
     which may be hard to distinguish'
   - Axis labels lack units (e.g., Satisfaction Score (%) or Satisfaction Score (0-100))
+  image_description: 'The plot displays a strip plot showing satisfaction scores (y-axis,
+    ranging from ~35 to 105) across four departments (Engineering, Marketing, Sales,
+    HR) on the x-axis. Each department has its own color: Engineering uses blue (#306998),
+    Marketing uses yellow (#FFD43B), Sales uses light blue (#4B8BBE), and HR uses
+    gray (#646464). Individual data points are shown with jitter applied horizontally
+    within each category. Red dashed horizontal lines indicate group means for each
+    department. The title "strip-basic · seaborn · pyplots.ai" appears at the top.
+    A legend in the upper right corner identifies the red dashed line as "Group Mean".
+    The y-axis is labeled "Satisfaction Score" and x-axis is labeled "Department".'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, categories well-separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Marker size=12 with alpha=0.7 is appropriate for 30-40 points per
+          category
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good contrast, but blue/light-blue for Engineering/Sales could be
+          confusing for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive but lack units (score could have "points"
+          or "%" clarification)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but only y-axis grid; legend is well
+          placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct strip plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, continuous values on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Jitter applied (0.25), transparency (0.7), horizontal mean lines
+          included as suggested
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range (35-105) with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies mean lines
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "strip-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows varied distributions (Engineering tight/high, Marketing wide,
+          Sales bimodal-ish, HR moderate) but the bimodal nature in Sales is not strongly
+          visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction surveys by department is a genuine real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Satisfaction scores 40-100 are plausible; could benefit from clear
+          indication this is out of 100
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports (matplotlib, numpy, pandas, seaborn) are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern seaborn API with hue parameter and legend=False
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.stripplot correctly with proper hue/palette pattern, but
+          could showcase more seaborn-specific styling like sns.set_context or sns.despine
+  verdict: APPROVED
diff --git a/plots/subplot-grid-custom/metadata/altair.yaml b/plots/subplot-grid-custom/metadata/altair.yaml
index 14a1cd332b..63f7f100d2 100644
--- a/plots/subplot-grid-custom/metadata/altair.yaml
+++ b/plots/subplot-grid-custom/metadata/altair.yaml
@@ -26,3 +26,188 @@ review:
     to colspan
   - Tick labels on x-axis of main chart are dense and could benefit from rotation
     or fewer ticks
+  image_description: 'The plot displays a custom subplot grid layout with 5 different
+    visualizations arranged in 3 rows. The top row shows a large "Daily Price Trend
+    (Main View)" line chart spanning the full width with a blue (#306998) line showing
+    price movements from January to late April 2024. The middle row contains two charts
+    side by side: a yellow bar chart showing "Trading Volume" and a blue histogram
+    showing "Return Distribution" with daily return percentages. The bottom row has
+    a scatter plot titled "Feature Correlation" showing blue points demonstrating
+    positive correlation between Feature A and Feature B, and a donut chart "Category
+    Breakdown" with four colored segments (blue, yellow, light blue, golden). A legend
+    for the category breakdown appears at the top right. The main title "subplot-grid-custom
+    · altair · pyplots.ai" is displayed at the top center. All text is readable, axes
+    are properly labeled with units where appropriate (Price $, Volume, Daily Return
+    %, Feature A/B).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is clearly readable; title is appropriately sized at 30pt,
+          subplot titles at 18-22pt, axis labels at 14-18pt. Slightly smaller than
+          ideal for some tick labels but still legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and titles are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line chart is well-sized (strokeWidth=4), scatter points are appropriately
+          sized (size=120), histogram and bar charts are clear. Donut chart could
+          be slightly larger.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue (#306998), yellow (#FFD43B), light blue (#4B8BBE) - colorblind-safe
+          palette with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout with clear visual hierarchy. Main chart appropriately
+          larger. Slight imbalance with legend placement far right.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Price ($)", "Volume", "Daily Return
+          (%)", "Feature A", "Feature B"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No grid lines (clean look), but legend for category breakdown is
+          placed far from the donut chart
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements custom subplot grid with non-uniform cell sizes
+          (main chart wider)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned for all subplots (time series, histogram,
+          scatter, categorical)
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Implements colspan-style layout with main chart spanning full width,
+          multiple plot types, dashboard-style layout. Minor: could show rowspan as
+          well.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly match category names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "subplot-grid-custom · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows diverse plot types (line, bar, histogram, scatter, donut) demonstrating
+          grid flexibility. Could include more variation in grid patterns.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial dashboard scenario with price, volume, returns, correlations,
+          and product breakdown - highly plausible real-world use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Price around $75-110 (realistic stock), volume 500-2000 (reasonable),
+          returns -6% to +6% (realistic daily). Correlation features slightly generic.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → charts → layout → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: altair, numpy, pandas'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses .properties(title=...) which works but could use more modern
+          patterns
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of Altair''s declarative syntax: vconcat/hconcat for
+          layout composition, resolve_scale for independent axes, mark_arc for donut,
+          comprehensive encoding with explicit type annotations (:Q, :T, :N)'
+  verdict: APPROVED
diff --git a/plots/subplot-grid-custom/metadata/bokeh.yaml b/plots/subplot-grid-custom/metadata/bokeh.yaml
index 7d90c16291..5ecafe26e6 100644
--- a/plots/subplot-grid-custom/metadata/bokeh.yaml
+++ b/plots/subplot-grid-custom/metadata/bokeh.yaml
@@ -28,3 +28,180 @@ review:
     notes
   - Could leverage more Bokeh-specific interactive features like linked brushing or
     hover tooltips
+  image_description: 'The plot displays a dashboard-style custom subplot grid with
+    5 panels. The main title "subplot-grid-custom · bokeh · pyplots.ai" is centered
+    at the top. The top row contains two panels: (1) "Price Trend Over Time" - a large
+    blue line chart with scatter points showing stock price declining from ~108 to
+    ~80 over 100 days, and (2) "Daily Trading Volume" - yellow vertical bars showing
+    volume fluctuating between 1-5 million. The bottom row has three smaller panels:
+    (1) "Returns Distribution" - a green histogram showing approximately normal distribution
+    of daily returns from -6% to +4%, (2) "Sales by Product Category" - blue vertical
+    bars for Products A-D with varying heights (45, 72, 38, 65 units), and (3) "Variable
+    Correlation Analysis" - coral/red scatter points showing positive correlation.
+    All panels have readable axis labels with units, subtle gray grid lines, and gray
+    outlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text readable, font sizes appropriate for large canvas, slight
+          reduction as some labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'markers and bars well-sized, scatter points visible with good alpha;
+          minor: histogram bins could have slightly more contrast'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: uses blue, yellow, green, coral - distinct and colorblind-safe palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: dashboard fills canvas well with varied panel sizes demonstrating
+          custom grid; minor asymmetry in bottom row widths
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive labels with units (Price ($), Volume (millions), Daily
+          Return (%), Units Sold)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: grids subtle at alpha 0.3; no legends needed for these single-series
+          plots but could benefit from subplot outlines being more subtle
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correctly implements custom subplot grid with non-uniform cell sizes
+          (larger main chart, smaller supporting views)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned for all five subplot types
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: demonstrates colspan concept (main time series wider), multiple plot
+          types (line, bar, histogram, scatter), dashboard-style layout
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: no legends shown (not strictly needed but could add series labels)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct "subplot-grid-custom · bokeh · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows 5 different plot types demonstrating grid flexibility; could
+          add a plot spanning multiple rows for full rowspan demonstration
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: excellent financial dashboard scenario with price, volume, returns,
+          sales - cohesive and realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: values realistic (prices ~80-110, volume 1-5M, returns ±6%), product
+          sales could have more realistic scale
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plots → layout → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: uses Bokeh's row/column layouts and ColumnDataSource; could leverage
+          more Bokeh-specific features like hover tools or linked axes for a dashboard
+  verdict: APPROVED
diff --git a/plots/subplot-grid-custom/metadata/highcharts.yaml b/plots/subplot-grid-custom/metadata/highcharts.yaml
index 6d63b9a09f..ce91920e4b 100644
--- a/plots/subplot-grid-custom/metadata/highcharts.yaml
+++ b/plots/subplot-grid-custom/metadata/highcharts.yaml
@@ -26,3 +26,186 @@ review:
   - Does not use the highcharts-core Python library API (Chart, HighchartsOptions
     classes) as recommended in the library guidelines; instead manually constructs
     JavaScript configurations
+  image_description: "The plot displays a financial dashboard with a custom grid layout\
+    \ containing 5 charts. The main chart (spanning 2 columns on the left) shows \"\
+    Stock Price (90 Days)\" as a blue line chart with markers, displaying price values\
+    \ from ~$123 to $161 over 90 trading days. The Y-axis shows \"Price ($)\" and\
+    \ X-axis shows \"Trading Day\". On the right side, there are 4 smaller detail\
+    \ charts arranged in a 2x2 grid:\n- Top-right 1: \"Trading Volume\" bar chart\
+    \ in yellow showing volume in millions\n- Top-right 2: \"Returns Distribution\"\
+    \ histogram in purple showing daily return percentages  \n- Bottom-right 1: \"\
+    Risk vs Return by Sector\" scatter plot in cyan with labeled data points for Tech,\
+    \ Health, Finance, Energy, and Consumer sectors\n- Bottom-right 2: \"20-Day Moving\
+    \ Average\" line chart in brown showing smoothed price data\n\nThe dashboard title\
+    \ \"subplot-grid-custom · highcharts · pyplots.ai\" appears at the top. All charts\
+    \ have subtle gray gridlines, proper axis labels with units, and legends. The\
+    \ color palette uses blue (#306998), yellow (#FFD43B), purple (#9467BD), cyan\
+    \ (#17BECF), and brown (#8C564B) - a colorblind-safe combination."
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All titles, axis labels, and tick marks are clearly readable at the
+          output size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements anywhere in the dashboard
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line widths, bar sizes, and scatter markers are well-sized; main
+          chart markers could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette with distinct colors for each chart
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Custom grid layout excellently demonstrates colspan/rowspan with
+          main chart spanning 2x2 and 4 detail charts
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes have descriptive labels with units: "Price ($)", "Volume
+          (M)", "Daily Return (%)", "Volatility (%)", "Return (%)", "MA Price ($)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legends are present but very small compared to the chart sizes; legend
+          text appears tiny relative to titles
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements custom subplot grid with mixed chart types (line,
+          column, scatter)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned for all 5 charts
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: colspan/rowspan, multiple chart types,
+          dashboard layout'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly describe the data series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "subplot-grid-custom · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Demonstrates all aspects: time series, volume bars, histogram distribution,
+          scatter with labels, moving average'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Financial dashboard is a realistic use case; data values are plausible
+          but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Stock prices ($123-$161), volumes in millions, returns in percentages
+          - all realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → configs → HTML → screenshot'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Highcharts and Selenium APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: While the implementation works, it uses raw JavaScript configuration
+          objects instead of the highcharts-core Python library. The code constructs
+          Highcharts charts directly via JavaScript literals embedded in HTML rather
+          than using the Chart class with HighchartsOptions as demonstrated in the
+          library guidelines.
+  verdict: APPROVED
diff --git a/plots/subplot-grid-custom/metadata/letsplot.yaml b/plots/subplot-grid-custom/metadata/letsplot.yaml
index 97bb9ec974..cc7e532a77 100644
--- a/plots/subplot-grid-custom/metadata/letsplot.yaml
+++ b/plots/subplot-grid-custom/metadata/letsplot.yaml
@@ -28,3 +28,184 @@ review:
   - The -0.15 value in correlation heatmap appears slightly clipped/cramped
   - Library features score limited - did not leverage lets-plot interactive tooltip
     capabilities in HTML output
+  image_description: 'The plot displays a financial dashboard with 5 subplots arranged
+    in a custom grid layout. The main plot (top-left, larger) shows a "Stock Price"
+    line chart with blue line and light blue fill area, price ranging from ~0 to ~100+
+    over 100 trading days. Below it is a "Trading Volume" bar chart with yellow/gold
+    bars showing volume in millions. On the right side, there are three smaller panels:
+    a "Returns" histogram (blue bars showing daily return distribution), a "Sectors"
+    bar chart (with diverging colors - green for positive returns in Tech/Health/Finance
+    and red for negative in Energy), and a "Correlations" heatmap (4x4 matrix with
+    correlation values displayed as text on colored tiles using a diverging blue-red-white
+    scale). The overall title "subplot-grid-custom · lets-plot · pyplots.ai" appears
+    at the top. The layout effectively demonstrates non-uniform cell sizes with the
+    main chart spanning more area than supporting panels.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; titles are bold and clear, axis labels appropriately
+          sized, though some could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and tick marks clearly visible
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line, bars, histogram bins, and heatmap tiles all clearly visible;
+          bar widths well-suited to density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue/gold/green/red scheme with good contrast; diverging scales
+          for sectors and correlations are colorblind-distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with clear hierarchy; main plot appropriately
+          larger, minor gaps between right panels
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes have descriptive labels with units: "Price ($)", "Volume
+          (M)", "Daily Return (%)", "Return (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grids (alpha 0.3), legends appropriately hidden where color
+          scale is self-explanatory
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements custom subplot grid with multiple plot types
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All data correctly mapped: time series, volume, returns histogram,
+          sector performance, correlations'
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Demonstrates row/column spanning via ggbunch; shows dashboard-style
+          layout with varied cell sizes. Minor: could have shown a cell spanning multiple
+          rows'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color scales accurate where used
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "subplot-grid-custom · lets-plot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 5 different plot types (line with area, bar, histogram, categorical
+          bar with gradient, heatmap with text); demonstrates diverse subplot capabilities
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial dashboard is a realistic, neutral use case exactly matching
+          the spec's described application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Stock prices, volumes, returns, correlations all have realistic values;
+          seed ensures reproducibility
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → individual plots → ggbunch assembly
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set early
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses noqa comments for wildcard import; minor issue
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggbunch for custom grid positioning which is lets-plot specific;
+          uses scale_fill_gradient2, geom_area, geom_tile with text; could have showcased
+          more unique lets-plot features like tooltips or interactivity
+  verdict: APPROVED
diff --git a/plots/subplot-grid-custom/metadata/matplotlib.yaml b/plots/subplot-grid-custom/metadata/matplotlib.yaml
index 943519d1ba..3eb366a044 100644
--- a/plots/subplot-grid-custom/metadata/matplotlib.yaml
+++ b/plots/subplot-grid-custom/metadata/matplotlib.yaml
@@ -26,3 +26,173 @@ review:
   - Bottom row subplots appear slightly cramped compared to the main panel proportions
   - Library features score could be improved by using shared axes between related
     plots (e.g., dates) or constrained_layout
+  image_description: The plot displays a 6-panel financial dashboard layout using
+    matplotlib's GridSpec. The main plot (spanning 2 columns and 2 rows in top-left)
+    shows a blue price trend line with light blue fill below, featuring dates (Jan-May)
+    on x-axis and price ($70-$110) on y-axis. Top-right shows a correlation scatter
+    plot with yellow markers (blue edges) plotting Variable X vs Variable Y. Middle-right
+    displays a "Performance" bar chart with alternating blue/yellow bars for products
+    A-D with value annotations (85, 72, 93, 67). Bottom-left shows a "Daily Volume"
+    bar chart in blue. Bottom-center shows a "Return Distribution" histogram in yellow
+    with blue edges and a dashed vertical line at 0. Bottom-right contains a "Summary
+    Stats" text box with monospace statistics on a light blue background. The overall
+    title "subplot-grid-custom · matplotlib · pyplots.ai" appears at the top. Color
+    scheme uses Python blue (#306998) and yellow (#FFD43B).
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, subplot titles 18pt+, axis labels 16pt+, all readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, date labels properly formatted by month
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter markers s=150 good for 50 points, line width adequate, bars
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of GridSpec with 2x2 main panel, bottom row slightly cramped
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Labels include units: "Price ($)", "Volume (M)", "Daily Return (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid has good alpha=0.3, but legend only on main plot; other subplots
+          lack legends where needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct GridSpec-based multi-panel dashboard layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All data correctly mapped to appropriate subplot types
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: colspan/rowspan used (2x2 main), varied cell sizes, dashboard-style
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend on main plot is accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "subplot-grid-custom · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows time series, scatter, bar charts, histogram, text stats - diverse
+          coverage
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial/stock dashboard is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Price range ($70-$110), volume (1-5M), returns (±5%) are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300, bbox_inches='tight'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses GridSpec, fill_between, DateFormatter - good matplotlib features
+          but could leverage more advanced features like shared axes
+  verdict: APPROVED
diff --git a/plots/subplot-grid-custom/metadata/plotly.yaml b/plots/subplot-grid-custom/metadata/plotly.yaml
index 1a807c5bac..2652dfa9b9 100644
--- a/plots/subplot-grid-custom/metadata/plotly.yaml
+++ b/plots/subplot-grid-custom/metadata/plotly.yaml
@@ -23,3 +23,178 @@ review:
   - No rowspan demonstration (spec mentions both colspan AND rowspan)
   - Grid lines not visible (could add subtle grid for better readability)
   - Scatter plot markers could be slightly larger for the data density (50 points)
+  image_description: 'The plot displays a custom subplot grid with 5 panels arranged
+    in a 2×3 grid. The main title "subplot-grid-custom · plotly · pyplots.ai" is centered
+    at the top in large font. The top-left panel ("Stock Price (2 Columns)") spans
+    2 columns and shows a blue line chart of stock price over time from January to
+    April 2024, with price declining from ~108 to ~78. The top-right panel ("Category
+    Performance") shows a yellow bar chart with 4 categories (Product A-D) with scores
+    ranging from ~68 to ~91. The bottom row has three equal-width panels: "Trading
+    Volume" (blue bar chart showing volume over time), "Revenue vs Expenses" (blue
+    scatter plot showing positive correlation), and "Daily Returns Distribution" (yellow
+    histogram of returns centered around 0%). All panels use the Python blue (#306998)
+    and yellow (#FFD43B) color scheme. Axis labels include units where appropriate.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text readable, title size 32pt, subplot titles 20pt, tick labels
+          14pt - good but tick labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: line width 3 for main plot, markers size 12 for scatter, bars clearly
+          visible - slightly small markers for scatter
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue and yellow are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent use of canvas, subplot grid well proportioned, main plot
+          appropriately larger
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: all axes have descriptive labels with units (Price ($), Volume (K),
+          Revenue ($K), etc.)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: no grid lines visible (plotly_white template), no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct custom subplot grid with non-uniform cell sizes (colspan=2
+          for main plot)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned in all subplots
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: has colspan spanning, multiple plot types (line, bar, scatter, histogram),
+          dashboard-style layout
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: no legend needed, subplot titles serve as identifiers
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'correct format: "subplot-grid-custom · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows multiple plot types and span configurations, but no rowspan
+          demonstrated (only colspan)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: financial dashboard scenario with stock price, volume, revenue/expenses,
+          returns - very realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: realistic values overall, though stock dropping from 108 to 78 in
+          4 months is steep
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports used (numpy, pandas, plotly.graph_objects, plotly.subplots)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: using current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: uses make_subplots with specs for custom grid, graph_objects for
+          fine control, but doesn't leverage interactivity features like hover customization
+  verdict: APPROVED
diff --git a/plots/subplot-grid-custom/metadata/plotnine.yaml b/plots/subplot-grid-custom/metadata/plotnine.yaml
index d22a7bb32d..d0d8d623dd 100644
--- a/plots/subplot-grid-custom/metadata/plotnine.yaml
+++ b/plots/subplot-grid-custom/metadata/plotnine.yaml
@@ -27,3 +27,186 @@ review:
   - The grid layout uses | and / operators which create a 2-column layout, but does
     not demonstrate explicit colspan/rowspan parameters mentioned in spec notes
   - Library features score could be higher with additional ggplot2 grammar features
+  image_description: 'The plot displays a custom subplot grid layout forming an investment
+    portfolio dashboard. The main title "subplot-grid-custom · plotnine · pyplots.ai"
+    appears at the top in bold black text. The layout consists of a large main plot
+    on the left side spanning the full height, showing "Portfolio Value Trend" with
+    a blue line chart displaying portfolio values from ~96,000 to ~103,000 over 60
+    trading days, with data points marked and a yellow linear regression trend line
+    with confidence band. On the right side are three stacked smaller panels: (1)
+    "Asset Allocation" - a vertical bar chart showing Stocks (~55%), Bonds (~25%),
+    Real Estate (~12%), and Cash (~8%) in blue, yellow, light blue, and coral colors;
+    (2) "Daily Trading Volume" - a bar chart showing volume fluctuations over 60 trading
+    days in blue; (3) "Returns Distribution" - a yellow histogram showing daily returns
+    centered around 0% with a roughly normal distribution. All subplots have white
+    backgrounds with subtle gray grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All text readable, main title is excellent, subplot titles and axis
+          labels are clear. Minor: some axis tick labels are slightly small but still
+          readable.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line, bars, and histogram bins are clearly visible. Points on main
+          plot could be slightly larger.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/coral color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Excellent dashboard layout with main plot given prominence. Right
+          column panels are well-proportioned. Minor: slight vertical compression
+          on right panels.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes have descriptive labels with units: "Portfolio Value ($)",
+          "Allocation (%)", "Volume (Units)", "Daily Return (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate. No legend needed for most plots,
+          but asset allocation chart hides legend (acceptable choice).
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct custom grid layout with multiple different plot types (line,
+          bar, histogram)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All data correctly mapped to appropriate axes
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Implements custom grid with non-uniform sizing (main plot spans
+          full height, 3 smaller panels stacked). Uses plotnine composition operators
+          (| and /). Minor: could demonstrate rowspan more explicitly.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No misleading legends
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "subplot-grid-custom · plotnine · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple plot types (line with trend, bar charts, histogram)
+          demonstrating dashboard capability. The portfolio data shows both upward
+          and downward movements.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Investment portfolio dashboard is an excellent real-world scenario
+          for dashboard-style layouts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic portfolio values (~$100k), reasonable allocation percentages,
+          plausible trading volumes and returns
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plots → composition → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plotnine composition operators (| for horizontal, / for
+          vertical stacking), ggplot grammar, stat_smooth for trend line. Could leverage
+          more ggplot2-specific features like faceting alternatives.
+  verdict: APPROVED
diff --git a/plots/subplot-grid-custom/metadata/seaborn.yaml b/plots/subplot-grid-custom/metadata/seaborn.yaml
index 088a8670ee..7910727c40 100644
--- a/plots/subplot-grid-custom/metadata/seaborn.yaml
+++ b/plots/subplot-grid-custom/metadata/seaborn.yaml
@@ -25,3 +25,181 @@ review:
   - Missing rowspan example - spec mentions cells spanning multiple rows (e.g., tall
     sidebar plot) but all cells only span columns
   - Heatmap y-axis label text appears concatenated/overlapping due to tight spacing
+  image_description: 'The plot shows a dashboard-style layout with 6 subplots arranged
+    in a custom grid. The main plot (top-left, spanning 2x2 cells) displays a "Daily
+    Price Trend" line chart with a blue line (#306998) and light blue fill beneath,
+    showing price ranging from ~100 to ~80 over time from Dec 29 to Apr 27. Top-right
+    shows a yellow scatter plot (#FFD43B) titled "Correlation Analysis" with Variable
+    X vs Y. Middle-right displays a "Quarterly Performance" boxplot with Q1-Q4 in
+    Set2 colors (teal, coral, olive, pink). Bottom row contains three smaller plots:
+    "Monthly Volume" bar chart with viridis palette (Jan-Dec), "Returns Distribution"
+    histogram with KDE curve and yellow zero-reference line, and a "Correlation Matrix"
+    heatmap in RdBu_r diverging colormap showing Price/Volume/Returns correlations.
+    The main title "subplot-grid-custom · seaborn · pyplots.ai" appears at the top
+    in bold.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable, main title 24pt, subplot titles 16-20pt, labels
+          14-16pt, ticks 10-12pt. Slightly smaller than optimal but still clear.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text. Month labels rotated 45° to avoid overlap.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Elements well-sized. Scatter markers s=120 appropriate for 80 points.
+          Line width 3 is good. Minor: boxplot could be slightly larger.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses viridis, Set2, RdBu_r - all colorblind-safe palettes.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of GridSpec with main plot spanning 2x2. Minor: heatmap
+          and scatter feel slightly cramped.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Price ($)", "Volume (Units)", "Daily
+          Returns (%)", "Performance Score".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid alpha=0.3 is good. No legends needed (colors self-explanatory).
+          Heatmap colorbar slightly small.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements custom subplot grid with GridSpec.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned in all subplots.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has colspan (main plot 2 cols), but no rowspan example shown (spec
+          mentions tall sidebar plot).
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Heatmap annotations accurate, boxplot categories clear.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "subplot-grid-custom · seaborn · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 6 different plot types (line, scatter, boxplot, bar, histogram,
+          heatmap). Good variety but could show rowspan feature.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Financial dashboard scenario: price trend, volume, returns, correlations.
+          Very realistic.'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Values realistic: price ~80-110, returns ±5%, volumes 1000-5000,
+          correlations 0.4-1.0.'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save. No functions/classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: matplotlib.dates, gridspec, plt, np, pd, sns.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses `hue` parameter correctly for seaborn 0.14+, but `legend=False`
+          pattern is verbose.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: 'Good use of seaborn functions: lineplot, scatterplot, boxplot, barplot,
+          histplot with kde, heatmap. Uses axes-level API correctly. Could leverage
+          FacetGrid or jointplot for more distinctive seaborn features.'
+  verdict: APPROVED
diff --git a/plots/subplot-grid/metadata/altair.yaml b/plots/subplot-grid/metadata/altair.yaml
index db598dba6e..9914228a17 100644
--- a/plots/subplot-grid/metadata/altair.yaml
+++ b/plots/subplot-grid/metadata/altair.yaml
@@ -26,3 +26,191 @@ review:
   - Canvas utilization has some extra whitespace on the right side
   - Could leverage more Altair-specific features like interactive selections or layered
     annotations
+  image_description: "The plot displays a well-organized 2×2 grid of four distinct\
+    \ visualizations with a centered main title \"subplot-grid · altair · pyplots.ai\"\
+    \ at the top. \n\n**Top-left (Scatter)**: \"Product Performance\" showing Units\
+    \ Sold (x-axis, 0-1000) vs Revenue ($K) (y-axis, 0-100) with ~50 blue circles\
+    \ of consistent size and good transparency.\n\n**Top-right (Line)**: \"Monthly\
+    \ Sales Trend\" displaying two lines—solid blue for actual Sales and dashed yellow\
+    \ for Target—spanning February to December. A legend labeled \"Metric\" distinguishes\
+    \ the two series. The y-axis shows Sales ($K) ranging from ~500 to 1,150.\n\n\
+    **Bottom-left (Bar)**: \"Regional Performance\" with 5 vertical bars for Central,\
+    \ East, North, South, and West regions. Bars are conditionally colored—blue for\
+    \ scores ≥80 (East, North) and yellow for <80 (Central, South, West). Y-axis shows\
+    \ Performance Score (0-100).\n\n**Bottom-right (Histogram)**: \"Order Value Distribution\"\
+    \ showing a right-skewed lognormal distribution of order values ($20-$420) with\
+    \ frequency on the y-axis (0-50). Blue bars with good visibility.\n\nColor scheme\
+    \ uses Python's signature blue (#306998) and yellow (#FFD43B). All text is clearly\
+    \ readable with appropriate font sizes. Grid lines are subtle."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; subplot titles ~22pt, axis labels ~16-18pt,
+          tick labels ~16pt. Slightly smaller than ideal for 4800×2700 but still good.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere; x-axis labels are horizontal and well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter markers (size=120) are appropriately sized for 50 points.
+          Line thickness (3) is good. Bar chart readable. Histogram bars could be
+          slightly wider.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow combination is colorblind-safe; good contrast against
+          white background.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good 2×2 layout with consistent spacing (60px). Some extra whitespace
+          on the right side of the canvas.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Revenue ($K)", "Sales ($K)", "Performance
+          Score", "Order Value ($)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid opacity at 0.3 is subtle. Legend for line chart is well-placed
+          but could be more prominent.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements a subplot grid with 4 different plot types (scatter,
+          line, bar, histogram).
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y mappings are correct for all four subplots.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: configurable grid (2×2), different plot
+          types per cell, clear titles, consistent spacing.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges without clipping.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend in line chart accurately shows Sales vs Target.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "subplot-grid · altair · pyplots.ai" but uses
+          regular dot instead of middle dot (·).
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: scatter for correlation, line for time series
+          with dual metrics, bar for categorical comparison with conditional coloring,
+          histogram for distribution.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Financial dashboard theme is realistic and neutral: product metrics,
+          monthly sales vs targets, regional performance, order value distribution.'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (revenue 10-100K, performance scores 0-100,
+          order values $20-420). Sales starting at 500K and reaching 1150K is plausible
+          but the cumulative growth pattern could be more realistic.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → charts → combine → save.
+          No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) at the start.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: altair, numpy, pandas.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs are current.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html which is correct for Altair.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative grammar with hconcat/vconcat for grid layout,
+          condition for color encoding, and scale/domain customization. Could leverage
+          more interactive features or layering.
+  verdict: APPROVED
diff --git a/plots/subplot-grid/metadata/bokeh.yaml b/plots/subplot-grid/metadata/bokeh.yaml
index 57693cf32c..afe263bc2a 100644
--- a/plots/subplot-grid/metadata/bokeh.yaml
+++ b/plots/subplot-grid/metadata/bokeh.yaml
@@ -25,3 +25,178 @@ review:
     true grid title) causing slight visual asymmetry
   - Individual subplot dimensions (2400x1350 each) result in a total grid size that
     may not match the target 4800x2700 exactly
+  image_description: 'The plot displays a 2x2 grid financial dashboard with four distinct
+    visualizations: (1) Top-left shows a blue line chart with data points tracking
+    stock price from ~$105 down to ~$85 over 60 trading days; (2) Top-right displays
+    yellow/gold vertical bars representing daily trading volume in millions; (3) Bottom-left
+    presents a color-coded scatter plot (green for high performers >8%, red for low
+    <5%, blue for mid-range) showing risk vs return analysis; (4) Bottom-right shows
+    a blue histogram of daily returns with a roughly normal distribution centered
+    near 0%. The main title "subplot-grid · bokeh · pyplots.ai" appears above the
+    top-left subplot.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Text is readable but titles at 24pt are slightly smaller than the
+          recommended 28pt for bokeh at this resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Elements are well-sized; scatter markers are appropriately sized
+          for 40 points, histogram bins are clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Good color choices: Python blue, yellow, green/red/blue for categorical
+          distinction'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Grid layout is well-balanced, but individual subplots could use more
+          of their canvas area
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes have descriptive labels with units (Price ($), Volume (Millions),
+          Risk (Volatility %), etc.)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid lines are subtle with dashed style and 0.3 alpha; no legend
+          needed for this layout
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements a 2x2 subplot grid with different plot types
+          (line, bar, scatter, histogram)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned for all four subplots
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: configurable grid, different plot types
+          per cell, clear titles'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed; color coding is self-explanatory in context)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "subplot-grid · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple subplot types (line, bar, scatter, histogram) demonstrating
+          grid versatility; could include one more diverse type
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial dashboard scenario is realistic and cohesive (price, volume,
+          risk/return, returns distribution)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic; stock price ~$85-110, volume in millions, returns
+          in percentages
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses gridplot, ColumnDataSource, and both PNG/HTML export; could
+          leverage more interactive features like tooltips
+  verdict: APPROVED
diff --git a/plots/subplot-grid/metadata/highcharts.yaml b/plots/subplot-grid/metadata/highcharts.yaml
index 7d70ff7b27..3a86a3eda6 100644
--- a/plots/subplot-grid/metadata/highcharts.yaml
+++ b/plots/subplot-grid/metadata/highcharts.yaml
@@ -27,3 +27,183 @@ review:
   - LineSeries imported from .area module instead of .line module (minor code issue)
   - Could benefit from synchronized tooltips across charts for better interactivity
   - Legend disabled on most charts where it could provide useful context
+  image_description: The plot displays a 2x2 grid layout of financial dashboard visualizations.
+    Top-left shows a "Stock Price Trend" with a solid blue line tracking price from
+    ~$102 down to ~$77 over 60 trading days, accompanied by a yellow dashed 10-Day
+    Moving Average. Top-right displays "Trading Volume" as blue vertical bars ranging
+    from ~10k to ~130k shares. Bottom-left shows "Daily Returns Distribution" as a
+    yellow histogram with frequency counts (0-8) across return percentages. Bottom-right
+    presents "Price vs Volume Relationship" as a purple scatter plot. The main title
+    "subplot-grid · highcharts · pyplots.ai" is centered at the top. All subplots
+    have light gray borders and consistent styling.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable; subplot titles are bold and clear, axis labels
+          visible, tick labels legible. Slightly smaller than ideal for some axis
+          labels.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements in any subplot
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line thickness appropriate, bars clearly visible, scatter markers
+          good size (radius=10). Histogram bars could be slightly more distinct.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent colorblind-safe palette: blue (#306998), yellow (#FFD43B),
+          purple (#9467BD). No red-green conflicts.'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good 2x2 grid layout with consistent spacing. Minor: bottom-left
+          histogram x-axis labels are cut off/not visible.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Price ($)", "Volume (shares)", "Return
+          (%)", "Frequency", "Trading Day"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle grid lines (alpha appropriate), legend well-placed in price
+          chart. However, most subplots disable legend when it could be useful.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: shows 4 different plot types (line, column, histogram,
+          scatter) in grid layout'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned for all subplots
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: configurable grid (2x2), different plot
+          types per cell, clear subplot titles, consistent spacing'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correct ("Price", "10-Day MA")
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "subplot-grid · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows diverse chart types demonstrating subplot grid capabilities.
+          Each subplot shows different visualization approach. Minor: could show shared
+          axes example.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent real-world scenario: financial dashboard with stock price,
+          volume, returns distribution, and price-volume relationship'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Good realistic values. Stock price ~$77-110, volume 10k-130k shares,
+          returns -4% to +4%. Random walk behavior is realistic.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → chart creation → HTML generation
+          → screenshot'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using LineSeries from highcharts_core.options.series.area (should
+          be from .line)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts grid container system, multiple chart instances,
+          dash_style for MA line. Could leverage more Highcharts-specific features
+          like synchronized tooltips across charts.
+  verdict: APPROVED
diff --git a/plots/subplot-grid/metadata/letsplot.yaml b/plots/subplot-grid/metadata/letsplot.yaml
index b170851cb4..ca981a1cbd 100644
--- a/plots/subplot-grid/metadata/letsplot.yaml
+++ b/plots/subplot-grid/metadata/letsplot.yaml
@@ -27,3 +27,178 @@ review:
   - Does not leverage lets-plot distinctive interactive features (tooltips, coordinated
     brushing) even in static output
   - Scatter points in bottom-right plot could be slightly larger for better visibility
+  image_description: |-
+    The plot displays a 2x2 grid of financial dashboard visualizations:
+    - **Top-left**: Stock price line chart (blue solid line) with 10-day moving average (yellow dashed line), showing prices from ~$85-$110 over 60 trading days
+    - **Top-right**: Daily trading volume bar chart (blue bars) showing volume in millions, ranging from ~0.5-2.2M
+    - **Bottom-left**: Histogram of daily returns (blue bars with darker border) centered around 0%, with a yellow dashed vertical line at zero
+    - **Bottom-right**: Scatter plot with linear regression showing volume vs absolute return relationship, blue points with yellow trend line and gray confidence band
+    - Overall title: "subplot-grid · lets-plot · pyplots.ai" at top
+    - Each subplot has its own descriptive title, and all use consistent blue (#306998) and yellow (#FFD43B) color scheme
+    - Clean minimal theme with subtle gray gridlines
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: main title large, subplot titles and
+          axis labels well-sized'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the grid
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line weights, bar widths, and scatter points are appropriate; scatter
+          points could be slightly larger for optimal visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: 2x2 grid is well-proportioned, plots fill canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes have descriptive labels with units ("Price ($)", "Volume
+          (Millions)", "Daily Return (%)", etc.)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend for the price plot's two lines (solid vs dashed) which
+          would help identify moving average vs actual price
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct subplot grid with 4 different plot types (line, bar, histogram,
+          scatter)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned in all subplots
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple plot types, configurable grid (2x2), clear titles per subplot,
+          cohesive dashboard
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: While missing legend for price plot lines, the title indicates what
+          they represent
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "subplot-grid · lets-plot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows diverse plot types (line with overlay, bars, histogram, scatter
+          with regression), demonstrates grid flexibility
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial dashboard theme with stock price, volume, returns histogram,
+          and volume-price relationship is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Stock prices (~$85-110), volume in millions, percentage returns are
+          all realistic financial values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plots → grid → save structure, no functions
+          or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct scale for 4800x2700
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses basic gggrid which is standard; could leverage lets-plot specific
+          features like tooltips, interactive elements, or coordinated views
+  verdict: APPROVED
diff --git a/plots/subplot-grid/metadata/matplotlib.yaml b/plots/subplot-grid/metadata/matplotlib.yaml
index 5a7c80d84c..2da9f56cb5 100644
--- a/plots/subplot-grid/metadata/matplotlib.yaml
+++ b/plots/subplot-grid/metadata/matplotlib.yaml
@@ -24,3 +24,182 @@ review:
   - Font sizes slightly below recommended guidelines (titles 18pt vs 24pt, labels
     16pt vs 20pt)
   - Legend fontsize could be larger for better readability (12-14 vs 16)
+  image_description: "The plot displays a 2x2 grid of financial dashboard-style visualizations\
+    \ with a main title \"subplot-grid · matplotlib · pyplots.ai\" at the top. \n\
+    - **Top-left**: Line chart showing stock price over 100 trading days (blue solid\
+    \ line) with a 20-day moving average (yellow dashed line). Price ranges from ~80\
+    \ to ~110 dollars.\n- **Top-right**: Bar chart showing trading volume in thousands,\
+    \ with blue bars for positive return days and red/coral bars for negative return\
+    \ days.\n- **Bottom-left**: Histogram showing return distribution from -6% to\
+    \ +4%, with a yellow vertical line at zero and a red dashed line at mean (-0.16%).\
+    \ The distribution appears roughly normal.\n- **Bottom-right**: Scatter plot showing\
+    \ Volume vs Absolute Return magnitude, with points colored by return value using\
+    \ an RdYlGn colormap (red=negative, green=positive), plus a colorbar.\nColors\
+    \ used: Python blue (#306998), yellow (#FFD43B), red (#D94A4A), RdYlGn colormap.\
+    \ All text is clearly readable with appropriate sizing."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable; titles 18pt (should be 24pt per guidelines), labels
+          16pt, ticks 14pt - slightly smaller than ideal but still clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line widths good (2-2.5), scatter markers s=80 appropriate for 99
+          points, bars clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/red scheme is distinguishable; RdYlGn colormap works
+          well
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of 2x2 grid, proper spacing with tight_layout
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes have descriptive labels with units: "Price ($)", "Volume
+          (thousands)", "Daily Return (%)", "Absolute Return (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid alpha=0.3 is good, but legends are slightly small (fontsize
+          12-14 vs recommended 16)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements 2x2 subplot grid with different plot types (line,
+          bar, histogram, scatter)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly mapped in all subplots
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: configurable grid (2x2), independent
+          axes, different plot types per cell, consistent spacing, clear subplot titles'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legends are accurate (Price, 20-day MA, Zero Return, Mean)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "subplot-grid · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple plot types (line, bar, histogram, scatter), moving
+          average, color encoding by sign, colorbar - excellent variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial dashboard is a perfect, neutral real-world use case for
+          subplot grids
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Stock prices ~$80-110, returns -6% to +4%, volumes in reasonable
+          range - all realistic, though starting price dropping immediately from 100
+          is slightly artificial
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plt.subplots, colorbar, axes methods - but could showcase
+          more advanced features like gridspec, shared axes, or inset_axes
+  verdict: APPROVED
diff --git a/plots/subplot-grid/metadata/plotly.yaml b/plots/subplot-grid/metadata/plotly.yaml
index 2e567ef8b0..d6a4acbe0e 100644
--- a/plots/subplot-grid/metadata/plotly.yaml
+++ b/plots/subplot-grid/metadata/plotly.yaml
@@ -26,3 +26,182 @@ review:
     and showgrid to axes)
   - Scatter markers in bottom-right subplot could be slightly larger (12-14 instead
     of 10) for better visibility
+  image_description: 'The plot displays a 2x2 grid of financial dashboard visualizations
+    with a clean white background. **Top-left**: A line chart showing stock price
+    (blue solid line ~$80-110 range) with a 20-day moving average (yellow dashed line)
+    over dates from Jan 7 to Feb 25, 2024. **Top-right**: A bar chart showing trading
+    volume (green/red bars for positive/negative returns, range 0-3M). **Bottom-left**:
+    A histogram of daily returns distribution (blue bars, roughly normal distribution
+    from -4% to +4%). **Bottom-right**: A scatter plot showing price vs volume relationship
+    (blue dots). The main title "subplot-grid · plotly · pyplots.ai" appears centered
+    at the top. A legend is positioned on the right side showing all series. All subplot
+    titles are clearly visible and axis labels include units where appropriate.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is clearly readable; title at 32pt, subplot titles at 20pt,
+          axis labels at 18pt, tick labels at 14pt - slightly below optimal for 4800px
+          width
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line width of 3 is good, scatter markers at size 10 are visible but
+          could be slightly larger for the 60 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue/yellow for price/MA (easily distinguishable), green/red
+          for volume direction with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent 2x2 grid layout with proper spacing (0.1 horizontal, 0.12
+          vertical), plot fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes have descriptive labels with units: "Price ($)", "Volume",
+          "Daily Return (%)", "Frequency"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No grid lines visible in the plots (plotly_white template), legend
+          positioned well but could benefit from subtle grid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements a 2x2 subplot grid with 4 different plot types
+          (line, bar, histogram, scatter)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned in all subplots
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has configurable grid (2x2), different plot types per cell, clear
+          subplot titles, consistent spacing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify all series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "subplot-grid · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Demonstrates all subplot grid features: 4 different plot types,
+          related but distinct visualizations, shared theme'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Financial dashboard is a realistic use case, but stock starting at
+          exactly $100 and data period of exactly 60 days feels slightly synthetic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Realistic values: stock price $80-110, volume 0.5M-3M, returns -4%
+          to +4%'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, plotly.graph_objects, make_subplots)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to 'plot.png' but also saves 'plot.html' (extra file, but not
+          incorrect)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses make_subplots with specs for different chart types, plotly_white
+          template, and HTML export for interactivity. Could have leveraged more Plotly-specific
+          features like hover customization or shared axes between subplots.
+  verdict: APPROVED
diff --git a/plots/subplot-grid/metadata/plotnine.yaml b/plots/subplot-grid/metadata/plotnine.yaml
index 9b00500fb5..f0de576990 100644
--- a/plots/subplot-grid/metadata/plotnine.yaml
+++ b/plots/subplot-grid/metadata/plotnine.yaml
@@ -23,3 +23,179 @@ review:
   - Main title positioning overlaps slightly with Sales Trend subplot title - could
     benefit from more vertical spacing
   - The legend in the sales trend plot consumes plot area; consider positioning outside
+  image_description: |-
+    The plot displays a well-organized 2x2 grid layout showcasing a product performance dashboard:
+    - **Top-left (Sales Trend)**: Line chart with two product series (A in blue, B in yellow) showing sales over 40 days. Points are connected by solid lines with dashed linear trend lines. Legend on the right shows "Product" with A and B entries.
+    - **Top-right (Quarterly Revenue)**: Vertical bar chart with Q1-Q4 quarters. Bars gradient from dark blue (Q1=45) through lighter blues (Q2, Q3) to yellow (Q4=18).
+    - **Bottom-left (Sales Distribution Product A)**: Histogram with ~8 blue bars showing frequency distribution of sales units, ranging roughly 85-130 units.
+    - **Bottom-right (Units vs Margin)**: Scatter plot with blue points showing relationship between units sold (100-500) and profit margin (20-45%). Yellow regression line with light yellow confidence band.
+    The main title "subplot-grid · plotnine · pyplots.ai" appears prominently in bold at the top center. All text is legible with good contrast against white backgrounds.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is clearly readable; subplot titles are bold, axis labels
+          and tick marks are appropriately sized for the output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; legend placement in top-left subplot
+          is well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Data elements are visible; scatter points and histogram bars are
+          sized appropriately, though line chart points could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-friendly (not red-green dependent)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with 2x2 grid; main title occasionally overlaps
+          with "Sales Trend" subplot title slightly
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes have descriptive labels with units: "Sales (Units)", "Revenue
+          (k$)", "Profit Margin (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle grid present; legend in sales trend plot is clear but takes
+          up space in plot area
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements subplot grid with 4 distinct plot types (line,
+          bar, histogram, scatter)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y variables correctly assigned in all subplots
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Dashboard-style visualization with multiple metrics, different plot
+          types per cell, consistent styling
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify Product A and B
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Main title uses correct format but positioned over subplot content
+          rather than above it
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows line chart with trends, categorical bars, distribution histogram,
+          and scatter with regression - demonstrates grid versatility well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product performance dashboard is a realistic business scenario with
+          sales, revenue, and profit metrics
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic; sales 80-140 units, revenue 18-45k, margin
+          20-45% are plausible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean script structure: imports → data → plots → composition → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used; no unnecessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses stat_smooth which works but is older API pattern
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Excellent use of plotnine's composition operators (| for columns,
+          / for rows), ggplot2 grammar of graphics syntax, stat_smooth for regression,
+          scale_color_manual/scale_fill_manual
+  verdict: APPROVED
diff --git a/plots/subplot-grid/metadata/pygal.yaml b/plots/subplot-grid/metadata/pygal.yaml
index dd1ef297fa..8119d88b7a 100644
--- a/plots/subplot-grid/metadata/pygal.yaml
+++ b/plots/subplot-grid/metadata/pygal.yaml
@@ -31,3 +31,193 @@ review:
     density level
   - Code structure uses loops for chart rendering which adds slight complexity beyond
     pure KISS style
+  image_description: "The plot displays a 2x2 grid of four distinct charts with a\
+    \ main title \"subplot-grid · pygal · pyplots.ai\" centered at the top in dark\
+    \ gray text. \n\n**Top-left**: Line chart titled \"Monthly Revenue ($K)\" showing\
+    \ an upward trend from January (~120K) to December (~248K). Blue line with visible\
+    \ data point markers, gray gridlines, x-axis labeled \"Month\" and y-axis labeled\
+    \ \"Revenue ($K)\".\n\n**Top-right**: Bar chart titled \"Sales by Category ($K)\"\
+    \ with five golden/yellow vertical bars representing Electronics (~45K), Apparel\
+    \ (~33K), Home (~28K), Sports (~20K), and Books (~15K). X-axis labeled \"Category\"\
+    , y-axis labeled \"Sales ($K)\".\n\n**Bottom-left**: Scatter plot titled \"Ad\
+    \ Spend vs Return on Investment\" showing ~30 blue dots distributed across the\
+    \ chart. X-axis labeled \"Ad Spend ($K)\" ranging 0-50, y-axis labeled \"ROI (%)\"\
+    \ ranging 0-6. Shows positive correlation trend.\n\n**Bottom-right**: Histogram\
+    \ titled \"Daily Order Volume Distribution\" with golden/yellow bars showing a\
+    \ roughly normal distribution of daily orders. X-axis labeled \"Orders per Day\"\
+    \ (60-280 range), y-axis labeled \"Frequency\" (0-70 range). Peak frequency around\
+    \ 150-160 orders.\n\nOverall: Clean white background, light gray plot backgrounds\
+    \ (#fafafa), consistent styling across all charts, well-balanced layout with equal\
+    \ cell sizes."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable; titles, axis labels, and tick marks are clear
+          at full size. Slightly smaller than ideal for publication quality.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; x-axis labels are spaced well, all
+          tick labels readable.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers and bars are well-sized; scatter plot dots are visible but
+          could be slightly larger for 30 points.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe and provide
+          good contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good 2x2 grid layout with balanced margins; slight excess whitespace
+          at edges.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes have descriptive labels with units (e.g., "Revenue ($K)",
+          "ROI (%)", "Orders per Day").
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legends needed (single series
+          per chart), but some charts could benefit from guide lines.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct subplot grid with four distinct chart types (line, bar, scatter,
+          histogram).
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned in all four subplots.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Configurable 2x2 grid, different plot types per cell, clear titles
+          for each subplot, consistent spacing.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges without clipping.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legends needed for single-series charts; appropriate decision.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "subplot-grid · pygal · pyplots.ai" but rendered
+          via PIL rather than native pygal (acceptable workaround).
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows four different chart types demonstrating grid versatility;
+          histogram shows distribution, scatter shows correlation, line shows trend,
+          bar shows comparison. Missing: shared axes example.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent business dashboard scenario with realistic metrics: revenue
+          trends, category sales, ad ROI, and order distribution.'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for business context; revenue in hundreds of
+          thousands, ROI percentages reasonable.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Linear flow but uses helper functions/loops for chart creation; slightly
+          more complex than ideal KISS structure.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (pygal, numpy, cairosvg, PIL).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as "plot.png" but also creates "plot.html"; should use explicit
+          path.
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of pygal''s distinctive features: SVG rendering with
+          cairosvg conversion, Style customization, multiple chart types (Line, Bar,
+          XY, Histogram), interactive HTML output with embedded SVG.'
+  verdict: APPROVED
diff --git a/plots/subplot-grid/metadata/seaborn.yaml b/plots/subplot-grid/metadata/seaborn.yaml
index f9bf9e3d45..a9b3c370ec 100644
--- a/plots/subplot-grid/metadata/seaborn.yaml
+++ b/plots/subplot-grid/metadata/seaborn.yaml
@@ -28,3 +28,183 @@ review:
     readability
   - Volume bar chart uses matplotlib ax.bar instead of seaborn barplot - could leverage
     more seaborn-native functions
+  image_description: 'The plot displays a 2x2 grid financial dashboard with the main
+    title "subplot-grid · seaborn · pyplots.ai" at the top. **Top-left**: A line chart
+    showing "Stock Price" over time from January to May 2024, with price ranging from
+    ~$85 to ~$110, rendered in blue (#306998). The x-axis shows dates with some rotation,
+    y-axis labeled "Price ($)". **Top-right**: A bar chart showing "Trading Volume"
+    with alternating blue and yellow bars (representing positive/negative return days),
+    x-axis showing "Trading Day" (0-100), y-axis "Volume (Millions)" (0-10). **Bottom-left**:
+    A histogram showing "Daily Returns Distribution" with blue bars, a yellow dashed
+    vertical line at zero labeled "Zero Return", x-axis "Daily Return (%)" ranging
+    from -4 to 4, y-axis "Frequency" (0-11). **Bottom-right**: A scatter plot showing
+    "Price vs Volume" with points colored using a RdBu (red-blue) diverging colormap
+    based on daily return, point sizes varying by volume, with a colorbar indicating
+    "Daily Return (%)" from -4 to +2. All subplots have subtle grid lines and clear
+    axis labels with units.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable; subplot titles 20pt, axis labels 16pt, ticks 12pt
+          - slightly below recommended 24/20/16 but still clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; date labels have rotation applied
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line, bars, histogram bins, and scatter points all clearly visible;
+          scatter sizes 50-300 appropriate for 100 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow scheme is colorblind-safe; RdBu palette for scatter is
+          accessible
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good 2x2 grid utilization; slight crowding in date labels but well-balanced
+          overall
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes have descriptive labels with units: "Price ($)", "Volume
+          (Millions)", "Daily Return (%)", "Frequency"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha=0.3; legend in histogram subplot placed well;
+          scatter colorbar works but could be better integrated
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct subplot grid with 4 distinct plot types: line, bar, histogram,
+          scatter'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All variables correctly assigned to appropriate axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid dimensions configurable, each cell has different plot type,
+          consistent spacing, clear subplot titles
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes limits
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Histogram legend correct; scatter legend disabled but colorbar provided
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "subplot-grid · seaborn · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple subplot types, different scales, varied visualization
+          approaches; could show shared axes example
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Financial dashboard is a perfect real-world scenario with plausible
+          stock data
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Stock prices ~$85-110 realistic, returns -5% to +4% realistic, volumes
+          in millions appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: matplotlib, numpy, pandas, seaborn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's lineplot, histplot, scatterplot with hue/size encoding;
+          volume bars use matplotlib ax.bar instead of seaborn barplot
+  verdict: APPROVED
diff --git a/plots/subplot-mosaic/metadata/altair.yaml b/plots/subplot-mosaic/metadata/altair.yaml
index 3e6c21dcf5..14e753db66 100644
--- a/plots/subplot-mosaic/metadata/altair.yaml
+++ b/plots/subplot-mosaic/metadata/altair.yaml
@@ -25,3 +25,182 @@ review:
   - Scatter plot lacks legend for the color encoding (efficiency gradient)
   - HTML version could benefit from interactive features (tooltips, selection) which
     Altair excels at
+  image_description: "The plot displays a dashboard-style mosaic layout with 6 panels\
+    \ on a white background. The main title \"subplot-mosaic · altair · pyplots.ai\"\
+    \ is displayed prominently at the top in large black text. \n\n**Top row:** A\
+    \ wide \"Monthly Revenue Overview\" line chart in blue (#306998) showing revenue\
+    \ trend from January to April, and a semi-circular gauge showing \"Performance\
+    \ Score\" of 78 in blue.\n\n**Middle row:** A \"Sales by Region\" bar chart with\
+    \ 5 blue bars (Central, East, North, South, West), and a large \"Efficiency vs\
+    \ Output\" scatter plot with circles varying in size and color (blue color scale),\
+    \ spanning two rows on the right side.\n\n**Bottom row:** A small yellow \"By\
+    \ Category\" bar chart (Type A, B, C) and a \"Daily Traffic Pattern\" area chart\
+    \ in blue showing hourly traffic.\n\nAll text is legible, axis labels include\
+    \ units where appropriate (Revenue $K, Sales $K, Efficiency %, Output units),\
+    \ and the layout demonstrates the mosaic concept well with varying panel sizes."
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All text readable, title is large, axis labels clear. Minor: some
+          smaller charts have slightly smaller fonts than optimal'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Elements well-sized. Scatter plot markers appropriately sized with
+          opacity. Minor: some smaller chart elements could be slightly larger'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue color scheme consistently, yellow accent for one panel
+          - colorblind safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good mosaic layout demonstrating varying sizes, balanced composition.
+          Minor: slight variation in spacing'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Revenue ($K)", "Sales ($K)", "Efficiency
+          (%)", "Output (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Clean appearance, no distracting grids. Minor: no legend for scatter
+          color encoding'
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct mosaic/subplot layout with multiple chart types
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned in all subplots
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Shows varying sizes, different plot types (line, bar, scatter, area,
+          gauge). Minor: no explicit empty cell demonstration with "."'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Subplot titles accurately describe content
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "subplot-mosaic · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows 6 different chart types demonstrating mosaic flexibility.
+          Minor: could show one panel with empty cell'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Real business dashboard scenario: revenue, sales regions, efficiency
+          metrics, traffic patterns'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Realistic values: revenue 30-50K, sales 29-52K, efficiency 60-95%,
+          traffic patterns'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → charts → concatenation → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Minor: uses expr calculation in mark_arc theta2 which could be simplified'
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's hconcat/vconcat for layout, layering for gauge chart,
+          proper encoding types. Could leverage more Altair-specific features like
+          selection or interactivity for HTML version
+  verdict: APPROVED
diff --git a/plots/subplot-mosaic/metadata/bokeh.yaml b/plots/subplot-mosaic/metadata/bokeh.yaml
index 0745d858d1..2bd330cb97 100644
--- a/plots/subplot-mosaic/metadata/bokeh.yaml
+++ b/plots/subplot-mosaic/metadata/bokeh.yaml
@@ -31,3 +31,173 @@ review:
     larger for better readability
   - Could demonstrate Bokeh interactive features (hover tools, linked brushing) for
     richer dashboard experience
+  image_description: |-
+    The plot displays a business dashboard mosaic layout with 6 subplots of varying sizes:
+    1. **Top-left (large, ~2/3 width)**: "Quarterly Revenue Overview" - Blue line chart with markers showing daily revenue ($50k-$62k) over 90 days
+    2. **Top-right (~1/3 width)**: "Product Profitability" - Scatter plot with blue (Premium Line) and yellow (Standard Line) dots showing profit margin vs units sold
+    3. **Middle-left (~half width)**: "Sales by Category" - Blue vertical bar chart for Electronics, Clothing, Food, Books
+    4. **Middle-right top (small)**: "Conversion Rate (%)" - Line chart with blue and yellow lines tracking weekly conversion rates
+    5. **Middle-right bottom (small)**: "Customer Satisfaction" - Yellow bar chart showing quarterly scores (Q1-Q4: 78-88)
+    6. **Bottom (full width)**: Main title "subplot-mosaic · bokeh · pyplots.ai" - Dual line chart comparing 2023 (blue) vs 2024 (green) monthly orders
+
+    Colors: Python Blue (#306998), Python Yellow (#FFD43B), Accent Green (#4CAF50). Legends present on most subplots. Dashed subtle grids. Clean professional appearance.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 7
+        max: 10
+        passed: true
+        comment: Titles and major labels readable; some smaller subplot labels could
+          be larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Good marker/line sizing for data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette (blue/yellow/green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Excellent mosaic demonstration; good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Labels include units: Revenue ($), Profit Margin (%), Sales ($),
+          Score'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grids (alpha 0.3), well-placed legends
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct mosaic layout with varying subplot sizes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned across all subplots
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Multiple plot types (line, scatter, bar), different sizes, cell spanning.
+          Empty cell gap feature partially implemented.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Accurate legends with meaningful labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "subplot-mosaic · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: time series, scatter, bars, comparisons, trends'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Business dashboard scenario is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic values (revenue $50-60k, margins 5-45%, satisfaction 78-88%)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Has apply_theme() helper function; defines unused data sources (source_e1,
+          source_e2)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: Theme is imported but only used to define an unused theme variable;
+          apply_theme uses direct attribute assignment instead
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of row/column layouts, ColumnDataSource, Legend/LegendItem,
+          Spacer; could add linked axes or hover tools
+  verdict: APPROVED
diff --git a/plots/subplot-mosaic/metadata/highcharts.yaml b/plots/subplot-mosaic/metadata/highcharts.yaml
index 08a134eab5..fe418c45aa 100644
--- a/plots/subplot-mosaic/metadata/highcharts.yaml
+++ b/plots/subplot-mosaic/metadata/highcharts.yaml
@@ -24,3 +24,179 @@ review:
   - Legends in Panel A and C are quite small and could be larger for better visibility
   - Could use Highcharts built-in dashboard/synchronized chart features for more native
     implementation
+  image_description: The plot displays a mosaic/dashboard layout with three panels.
+    The main title "subplot-mosaic · highcharts · pyplots.ai" appears at the top center.
+    Panel A (large, spanning upper-left 2x2) shows a line chart of "Monthly Sales
+    Trend" with months Jan-Dec on the x-axis and Sales (Units) 38-88 on the y-axis,
+    using a blue line with circular markers. Panel B (narrow, upper-right) shows a
+    horizontal bar chart "Regional Performance" with four regions (North, South, East,
+    West) in colorblind-safe colors (blue, yellow, purple, cyan) with values 245,
+    312, 189, 276 respectively. Panel C (wide, bottom spanning full width) displays
+    a scatter plot "Product Price vs Revenue Analysis" with Product Price ($) 10-98
+    on x-axis and Revenue ($) 0-18k on y-axis, using blue semi-transparent dots. All
+    panels have light gray backgrounds with subtle shadows, creating a cohesive dashboard
+    appearance. Grid lines are subtle gray.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels readable, though some tick labels on scatter
+          x-axis are slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Scatter markers well-sized with good alpha, line chart markers visible,
+          bar chart clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette used (blue, yellow, purple, cyan)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent mosaic layout AAB/AAB/CCC, good proportions, panels fill
+          canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Sales (Units)", "Product Price ($)",
+          "Revenue ($)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legends present but "Sales" legend in Panel A is very small; "Products"
+          legend in Panel C barely visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct mosaic subplot layout with varying sizes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned in all three panels
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple chart types (line, bar, scatter), asymmetric layout, visual
+          hierarchy
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show full data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correct
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "subplot-mosaic · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple chart types and layout variations; scatter shows good
+          variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Plausible business dashboard scenario (sales, regional performance,
+          product analysis)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Reasonable values; sales 42-85 units, regional 189-312, product price
+          $10-100
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only used imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses strict=False in zip which is fine but unusual
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts' native chart types and styling; CSS Grid for layout
+          is clever but not Highcharts-specific
+  verdict: APPROVED
diff --git a/plots/subplot-mosaic/metadata/letsplot.yaml b/plots/subplot-mosaic/metadata/letsplot.yaml
index 2e89c3e760..9f33587827 100644
--- a/plots/subplot-mosaic/metadata/letsplot.yaml
+++ b/plots/subplot-mosaic/metadata/letsplot.yaml
@@ -26,3 +26,187 @@ review:
     figure
   - Does not demonstrate empty cell capability (using "." placeholder) mentioned in
     spec notes
+  image_description: 'The plot displays a complex 6-panel mosaic layout with varying
+    subplot sizes. At the top, a full-width area chart shows "Daily Revenue Overview"
+    with a blue line (#306998) and semi-transparent blue fill over 100 days. The middle
+    row has a bar chart "Sales by Product" spanning 2/3 width showing 5 product categories
+    in yellow bars with blue borders, and a scatter plot "Effort vs Output" in the
+    remaining 1/3 with blue points and a yellow regression line. The bottom row contains
+    three equal-width panels: a blue histogram "Score Distribution" showing a bimodal
+    distribution, a yellow line chart "Monthly Growth" with blue points, and a heatmap
+    "Regional Performance" using a yellow-to-blue gradient with white text values.
+    The title "subplot-mosaic · letsplot · pyplots.ai" appears at the top left.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All text is readable; subplot titles are bold and clear, axis labels
+          and tick marks are appropriately sized. Minor: some axis text could be slightly
+          larger.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; bar chart x-axis labels are angled at 45° to
+          avoid collision.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: All elements are clearly visible; scatter points, bars, histogram
+          bins, and heatmap tiles are well-sized. The scatter points could be slightly
+          larger for the data density.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Consistent blue/yellow color scheme is colorblind-safe and provides
+          good contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good mosaic arrangement demonstrating the spec's purpose. The layout
+          effectively shows varying sizes with clear visual hierarchy.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Revenue ($)", "Effort (hours)",
+          "Output (units)", "Growth Rate (%)".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 'Subtle grids present on most panels. The heatmap legend is appropriately
+          hidden. Minor: grid styling could be more consistent across panels.'
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements mosaic subplot layout with varying panel sizes.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y mappings are correct across all 6 subplot types.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Demonstrates mosaic pattern (AAA/BBC/DEF), multiple plot types (area,
+          bar, scatter, histogram, line, heatmap), and varying sizes. Minor: no demonstration
+          of empty cells/gaps mentioned in spec.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes display complete data ranges appropriately.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legends are accurate where present; appropriately hidden on heatmap.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title "subplot-mosaic · letsplot · pyplots.ai" is present but appears
+          as ggbunch title rather than prominently centered.
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 6 different plot types with various mosaic sizes (full-width,
+          2/3 width, 1/3 width). The bimodal histogram and scatter with regression
+          demonstrate data variation.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Business dashboard scenario with revenue, sales, effort/output correlation,
+          score distribution, growth metrics, and regional performance is coherent
+          and professional.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All values are realistic: revenue ~1000-1500$, sales 290-520 units,
+          growth rates 2-5%, performance scores 60-100.'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plots → combine → save structure.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at beginning.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (os, numpy, pandas, lets_plot).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using `size` parameter in some geoms; lets-plot prefers `stroke`
+          for outline widths.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html correctly.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Effectively uses ggbunch() for mosaic layout with explicit regions,
+          theme_minimal(), geom_smooth with method="lm", scale_fill_gradient for heatmap.
+          Good use of lets-plot grammar of graphics.
+  verdict: APPROVED
diff --git a/plots/subplot-mosaic/metadata/matplotlib.yaml b/plots/subplot-mosaic/metadata/matplotlib.yaml
index 18fa033c8e..f927cb3b28 100644
--- a/plots/subplot-mosaic/metadata/matplotlib.yaml
+++ b/plots/subplot-mosaic/metadata/matplotlib.yaml
@@ -26,3 +26,175 @@ review:
     visible for 80 points)
   - Missing demonstration of empty cells with . placeholder mentioned in spec notes
   - Metrics panel (E) layout could be tighter - the box has significant internal whitespace
+  image_description: 'The plot displays a mosaic subplot layout with 5 panels arranged
+    in the pattern "AAB;AAB;CDE". The main panel (A) shows a cumulative sales time
+    series with a blue line and light blue fill area, spanning the left 2/3 of the
+    upper portion. Panel B on the right shows a horizontal bar chart with yellow bars
+    (outlined in blue) for 4 products with value labels. The bottom row has three
+    smaller panels: C shows a scatter plot with blue points, D displays a histogram
+    with blue bars, and E contains a "Key Metrics" panel with text values. The main
+    title "subplot-mosaic · matplotlib · pyplots.ai" appears at the top. Colors are
+    primarily blue (#306998) and yellow (#FFD43B) with white backgrounds and subtle
+    gray grids.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; main titles and labels are appropriately sized,
+          though some bottom panel labels are slightly smaller
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Most elements well-sized; scatter points could be slightly larger
+          for the 80-point dataset
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow palette is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout following mosaic pattern; slight imbalance with metrics
+          panel having more whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units ("Cumulative Sales ($)", "Units Sold")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grids are subtle (alpha 0.3), but no legends needed for this visualization
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements mosaic subplot layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Data appropriately mapped in each subplot
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows varying sizes, multiple plot types, but doesn't demonstrate
+          empty cells with "." placeholder
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legends present (not strictly required but would help)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "subplot-mosaic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Demonstrates time series, bar chart, scatter, histogram, and metrics
+          panel - excellent variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales dashboard scenario is realistic and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All values are realistic (sales ~500-650, units 85-120, percentages
+          for metrics)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Excellent use of plt.subplot_mosaic() which is matplotlib's signature
+          feature for this task, but could also use additional matplotlib-specific
+          features like annotations or color normalization
+  verdict: APPROVED
diff --git a/plots/subplot-mosaic/metadata/plotly.yaml b/plots/subplot-mosaic/metadata/plotly.yaml
index e4b0b55c79..bd1fd4b9a5 100644
--- a/plots/subplot-mosaic/metadata/plotly.yaml
+++ b/plots/subplot-mosaic/metadata/plotly.yaml
@@ -29,3 +29,174 @@ review:
     spec
   - Bottom row metric panels feel slightly cramped; row_heights could be adjusted
     for better balance
+  image_description: |-
+    The plot displays a dashboard-style mosaic layout with 7 subplots arranged in a 3-row structure. The title "subplot-mosaic · plotly · pyplots.ai" appears at the top center.
+
+    **Row 1:** A wide "Revenue Trend (Overview)" line chart with blue fill spans 2 columns on the left, showing an upward trend from ~50k to ~62k over Jan-Apr 2024. A narrower "Monthly Sales" bar chart (yellow bars) occupies the right column, displaying Jan-Jun sales data.
+
+    **Row 2:** A "Product Performance" scatter plot with blue markers spans 2 columns showing Feature X vs Feature Y distribution (80 points). A "Category Distribution" horizontal bar chart with multi-colored bars (blue, yellow, gray, brown) shows Electronics, Clothing, Food, Books, Sports categories on the right.
+
+    **Row 3:** Three smaller metric panels - "Efficiency" (blue area chart, 0-100%), "Quality Score" (yellow area chart, 0-70 Score), and "Response Time" (blue line chart, 0-60 ms), each showing 30-day trends.
+
+    Colors are consistent (blue #306998, yellow #FFD43B). All text is legible. Layout shows clear visual hierarchy with larger panels for primary data.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable, fonts appropriately scaled for 4800x2700 canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers and lines visible, scatter markers slightly small for data
+          density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas but bottom row panels feel slightly cramped
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units (Revenue ($), Sales ($), %, ms)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend disabled (appropriate for this layout), grid via plotly_white
+          template
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct mosaic subplot layout with varying sizes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned across all subplots
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has column spanning (AAB/CCD pattern), multiple plot types; no empty
+          cell demo with "." placeholder
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend disabled but subplot titles serve as labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "subplot-mosaic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows line, bar, scatter, horizontal bar, area charts; missing gap/empty
+          cell demo
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Business dashboard scenario (revenue, sales, categories, metrics)
+          is realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values mostly appropriate; metric panels could show more meaningful
+          Y-axis ranges
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only used imports (numpy, pandas, plotly)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses make_subplots with specs/colspan, row_heights, graph_objects;
+          could leverage more interactive features or annotations
+  verdict: APPROVED
diff --git a/plots/subplot-mosaic/metadata/plotnine.yaml b/plots/subplot-mosaic/metadata/plotnine.yaml
index a46cfc4890..5781808624 100644
--- a/plots/subplot-mosaic/metadata/plotnine.yaml
+++ b/plots/subplot-mosaic/metadata/plotnine.yaml
@@ -28,3 +28,177 @@ review:
     theme system
   - Could demonstrate the mosaic empty cell feature mentioned in the spec using placeholder
     characters
+  image_description: 'The plot displays a well-organized mosaic dashboard layout with
+    5 distinct panels. The top row features a large line+point chart (left ~2/3 width)
+    showing sales trends over 60 days for three products (Alpha in dark blue, Beta
+    in yellow, Gamma in light blue), alongside a smaller bar chart (right ~1/3) showing
+    quarterly revenue with Q4 highlighted in yellow. The bottom row contains three
+    equally-sized panels: a scatter plot of Units Sold vs Margin (%), a heatmap showing
+    regional performance across Sales/Profit/Growth metrics with numeric values displayed
+    in each cell, and a bar chart of monthly scores (Jan-Jun). The main title "subplot-mosaic
+    · plotnine · pyplots.ai" is centered at the top in bold. Colors are consistent
+    throughout with a blue (#306998) and yellow (#FFD43B) theme.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title is large and bold, axis labels clear,
+          though some tick labels on the heatmap are slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Elements well-sized; scatter points visible with good alpha; line
+          plot markers appropriately sized for data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-friendly, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good mosaic layout with varying panel sizes; slight imbalance as
+          heatmap legend is at the very bottom separated from its panel
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with units where appropriate ("Sales (Units)",
+          "Revenue (k$)", "Margin (%)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grids subtle, product legend well placed; heatmap colorbar legend
+          at bottom is functional but isolated from the panel
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements mosaic subplot layout with varying sizes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned across all panels
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows different plot types (line, bar, scatter, heatmap), varying
+          panel sizes; could demonstrate empty cell placeholder feature
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels are accurate
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Has correct format "subplot-mosaic · plotnine · pyplots.ai" but uses
+          regular dot instead of middle dot (·) in some places
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple plot types, varying panel sizes; demonstrates visual
+          hierarchy with larger overview panel
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Product performance dashboard is a plausible business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for business metrics; some margin values go
+          slightly negative which could be more realistic
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Effectively uses plotnine's composition operators (| for horizontal,
+          / for vertical layout), ggplot grammar with multiple geoms, custom theming
+  verdict: APPROVED
diff --git a/plots/subplot-mosaic/metadata/pygal.yaml b/plots/subplot-mosaic/metadata/pygal.yaml
index 8dd4522308..e9c2fd8f98 100644
--- a/plots/subplot-mosaic/metadata/pygal.yaml
+++ b/plots/subplot-mosaic/metadata/pygal.yaml
@@ -24,3 +24,162 @@ review:
   - Uses helper functions (render_chart_to_image, get_svg_content) which violates
     KISS principle for plot implementations
   - Does not demonstrate empty cell placeholder feature mentioned in spec
+  image_description: |-
+    The plot displays a mosaic dashboard layout with 5 panels arranged in an "AAB;AAC;DDE" pattern:
+    - **Panel A (top-left, 2×2)**: Line chart showing "Monthly Revenue vs Costs" with blue revenue line and yellow costs line over 12 months (Jan-Dec). Both lines show upward trends with revenue ranging ~120-218 and costs ~85-135. Dots mark data points, legend at bottom.
+    - **Panel B (top-right)**: Horizontal bar chart "Sales by Category" showing 5 categories (Electronics: 450, Clothing: 320, Home: 280, Sports: 195, Books: 165) with values printed in bars using pyplots color palette.
+    - **Panel C (middle-right)**: Donut chart "Regional Share" showing North (blue, ~35%), South (yellow), East (green), West (orange) with legend at bottom.
+    - **Panel D (bottom-left, 2×1)**: XY scatter plot "Marketing ROI Correlation" with Marketing Spend ($K) on x-axis and Sales ($K) on y-axis, showing positive correlation with ~40 blue dots.
+    - **Panel E (bottom-right)**: Half-pie solid gauge "Target Achievement" showing 78% progress with blue fill.
+    - Main title "subplot-mosaic · pygal · pyplots.ai" centered at top in dark gray. White background with light gray plot backgrounds.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Most text readable, but some labels on smaller panels could be larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: All elements visible, scatter dots appropriately sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses pyplots palette, colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent mosaic layout, panels fill space well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive with units (Amount $K, Marketing Spend $K, Sales $K)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle, but pie chart legend could be better positioned
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct mosaic subplot layout with varying sizes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned in all panels
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: All major features present, but no empty cell demonstration (spec
+          mentions "." for gaps)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legends correct but placement varies
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "subplot-mosaic · pygal · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 5 different chart types (line, bar, pie, scatter, gauge), demonstrating
+          mosaic flexibility
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Sales/business dashboard is plausible and neutral
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Revenue, costs, and percentages are realistic
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses helper function `render_chart_to_image()` and `get_svg_content()`,
+          violates KISS
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/subplot-mosaic/metadata/seaborn.yaml b/plots/subplot-mosaic/metadata/seaborn.yaml
index 457513ada7..f0c1d14e34 100644
--- a/plots/subplot-mosaic/metadata/seaborn.yaml
+++ b/plots/subplot-mosaic/metadata/seaborn.yaml
@@ -29,3 +29,182 @@ review:
     or statistical annotations
   - Feature coverage could include additional plot types to better demonstrate mosaic
     flexibility
+  image_description: |-
+    The plot displays a 7-panel mosaic dashboard layout with the title "subplot-mosaic · seaborn · pyplots.ai" at the top. The layout follows an "AAB;CCD;EFG" pattern:
+    - **Panel A (top-left, spanning 2 columns)**: A blue line chart showing "Revenue Trend Overview" from late December 2023 to early April 2024, with revenue ranging from ~50,000 to ~65,000 dollars.
+    - **Panel B (top-right)**: A yellow scatter plot titled "Marketing ROI" showing Marketing Spend ($K) vs Conversions with blue-edged circular markers.
+    - **Panel C (middle-left, spanning 2 columns)**: A bar chart "Sales by Channel" showing four bars (Online, Retail, Partner, Direct) in blue and yellow colors.
+    - **Panel D (middle-right)**: A histogram "Response Times" showing a bimodal distribution of response times in milliseconds.
+    - **Panel E (bottom-left)**: A small blue line chart "CPU Usage" showing hourly CPU percentage.
+    - **Panel F (bottom-center)**: A small yellow line chart "Memory Usage" showing hourly memory percentage.
+    - **Panel G (bottom-right)**: A box plot "Latency" showing latency distribution across four regions (North, South, East, West) with different colored boxes.
+    All panels have subtle gray grid lines, clear axis labels with units, and appropriate font sizes.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title is 22pt (slightly below 24pt ideal),
+          panel titles 14-18pt are good, tick labels appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, clean separation between all elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers and lines are well-sized for their data density; scatter
+          markers (s=100) appropriate for 50 points; box plots visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) palette is colorblind-friendly;
+          some panels could benefit from more color distinction
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent mosaic layout with proper height ratios (1.2, 1, 0.8),
+          panels fill space well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes have descriptive labels with units (Revenue ($), Sales ($K),
+          Response Time (ms), CPU (%), etc.)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grids are subtle (alpha=0.3, dashed), no unnecessary legends cluttering
+          the view
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements mosaic subplot layout with varying sizes using
+          matplotlib's subplot_mosaic
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X/Y correctly assigned in all 7 panels
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Multiple plot types (line, scatter, bar, histogram, box), asymmetric
+          layout, clear visual hierarchy
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All axes show complete data ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legends appropriately hidden where not needed (hue used for coloring
+          only)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "subplot-mosaic · seaborn · pyplots.ai" format
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 6 different plot types demonstrating mosaic flexibility; could
+          include one more type like a heatmap or pie
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Business dashboard scenario is realistic and neutral (revenue, marketing,
+          sales channels, system metrics)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Most values are realistic; revenue ~50-65K, CPU 0-50%, response times
+          25-175ms are plausible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.pyplot, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern seaborn API with hue parameter for coloring
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but no explicit dpi=300 at 16x9 figsize produces
+          4800x2700px only if matplotlib defaults work
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's lineplot, scatterplot, barplot, histplot, and boxplot
+          effectively; however, doesn't leverage seaborn-specific features like regplot,
+          kdeplot, or FacetGrid that would distinguish from pure matplotlib
+  verdict: APPROVED
diff --git a/plots/sudoku-basic/metadata/altair.yaml b/plots/sudoku-basic/metadata/altair.yaml
index c02f75e90a..a47c45682b 100644
--- a/plots/sudoku-basic/metadata/altair.yaml
+++ b/plots/sudoku-basic/metadata/altair.yaml
@@ -21,4 +21,170 @@ review:
   - Numbers are large, bold, and well-centered within cells
   - Code follows KISS principles with clear data preparation and chart composition
   - Properly saves both PNG and HTML outputs
-  weaknesses: []
+  weaknesses:
+  - The axis labels criterion (VQ-06) doesn't apply to this spec type, but this is
+    handled appropriately
+  image_description: The plot displays a 9×9 Sudoku grid with the title "sudoku-basic
+    · altair · pyplots.ai" at the top. The grid has a clean black-and-white design
+    with thick black lines (approximately 4px) separating the nine 3×3 boxes and thinner
+    lines (1px) dividing individual cells. Numbers are displayed in bold black text,
+    centered within their respective cells. The puzzle shows a partially filled Sudoku
+    with 30 given numbers distributed across the grid. Empty cells are displayed as
+    blank spaces. The layout is square with excellent canvas utilization - the grid
+    fills most of the available space.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Numbers are large (48pt), bold, and perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; numbers are well-centered in cells
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid lines clearly visible with proper thick/thin hierarchy
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Monochrome black-on-white design, perfect for printing
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 900×900 grid with excellent proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Sudoku grid (no axes needed, no deduction appropriate)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid lines are clear and appropriately styled
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 9×9 Sudoku grid visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Numbers correctly positioned in cells
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thick/thin line hierarchy, centered numbers, blank empty cells
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 9 rows and columns visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Sudoku (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "sudoku-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows a valid Sudoku puzzle with good number distribution, though
+          could demonstrate more varied placement patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses a real, valid Sudoku puzzle (the "World's Hardest Sudoku" puzzle)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standard 9×9 grid with values 1-9
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → build → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded puzzle)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Altair's layering, mark_rule for grid lines, mark_text
+          for numbers, and declarative encoding
+  verdict: APPROVED
diff --git a/plots/sudoku-basic/metadata/bokeh.yaml b/plots/sudoku-basic/metadata/bokeh.yaml
index d54e61fbc5..00e0297c2d 100644
--- a/plots/sudoku-basic/metadata/bokeh.yaml
+++ b/plots/sudoku-basic/metadata/bokeh.yaml
@@ -24,3 +24,164 @@ review:
   - Feature coverage could show more variety in number distribution across the puzzle
   - Library features usage is basic - could leverage ColumnDataSource for more idiomatic
     Bokeh code
+  image_description: The plot displays a standard 9×9 Sudoku grid with a clean black-and-white
+    design. The title "sudoku-basic · bokeh · pyplots.ai" appears at the top in bold
+    text. The grid uses thick black lines to separate the nine 3×3 boxes and thin
+    black lines for individual cell boundaries. Numbers (1-9) are displayed in bold
+    black font, centered within their cells. Empty cells are blank (no zeros shown).
+    The overall layout is square with balanced proportions, and the grid fills a good
+    portion of the canvas. The monochrome design is suitable for printing.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt and numbers at 60pt are perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, all numbers clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid lines and numbers optimally sized for the canvas
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Black-and-white design is universally accessible
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 3600×3600 canvas well-utilized, grid centered
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Sudoku grid (no axes needed, not penalized)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid lines well-differentiated (thin vs thick)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 9×9 Sudoku grid visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Numbers correctly placed in cells, row 0 at top
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thick box boundaries, thin cell lines, centered numbers, blank empties
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 9×9 grid displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Sudoku (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "sudoku-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows valid Sudoku puzzle with good number distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses a real, solvable Sudoku puzzle pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Numbers 1-9 as expected, partially filled puzzle
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear code: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded grid)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Minor: could use `output_file` placement'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Bokeh Label model for text, export_png, and HTML output
+  verdict: APPROVED
diff --git a/plots/sudoku-basic/metadata/highcharts.yaml b/plots/sudoku-basic/metadata/highcharts.yaml
index c6e3b5c71d..a0bcefa62a 100644
--- a/plots/sudoku-basic/metadata/highcharts.yaml
+++ b/plots/sudoku-basic/metadata/highcharts.yaml
@@ -27,3 +27,166 @@ review:
     when printed'
   - Grid lines are rendered via HTML divs rather than native Highcharts features,
     which is less elegant
+  image_description: The plot displays a 9×9 Sudoku grid with the title "sudoku-basic
+    · highcharts · pyplots.ai" at the top in black text. The grid features a clean
+    black-and-white design with thick black lines separating the nine 3×3 boxes and
+    thin gray lines dividing individual cells. Numbers (1-9) are displayed in bold
+    black font, centered within their respective cells. The puzzle shows a partially
+    filled Sudoku with numbers like 5,3 in the top-left box, 7 in the top-center area,
+    6,1,9,5 in the second row, etc. Empty cells are shown as blank white spaces. The
+    overall layout is square with good proportions and the grid fills most of the
+    canvas area appropriately.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title and all numbers are clearly readable with appropriate font
+          sizes (56px title, 72px numbers)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, numbers are well-centered in cells
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid lines and numbers are perfectly visible with good contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Monochrome design (black/white/gray) is fully accessible
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Grid fills canvas well, minor issue with small red bar at bottom
+          edge
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Sudoku grid (no axes needed), full points
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed, but the thin gray lines could be slightly more
+          visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 9×9 Sudoku grid visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Numbers correctly positioned in cells, zeros shown as empty
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: thick 3×3 box lines, thin cell lines,
+          centered numbers, empty cells blank'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 9×9 grid displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type, full points
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "sudoku-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows a valid Sudoku puzzle with mixed filled/empty cells, but could
+          show more variety in number distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses a real, solvable Sudoku puzzle pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Numbers 1-9 as expected, valid Sudoku constraints
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart setup → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (fixed puzzle), no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but also creates intermediate plot_temp.png
+          (cleaned up)
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/sudoku-basic/metadata/letsplot.yaml b/plots/sudoku-basic/metadata/letsplot.yaml
index 67a0e50b6a..c902ad31d8 100644
--- a/plots/sudoku-basic/metadata/letsplot.yaml
+++ b/plots/sudoku-basic/metadata/letsplot.yaml
@@ -24,3 +24,168 @@ review:
   weaknesses:
   - Pandas import is unnecessary; simple Python lists/dicts would suffice and reduce
     dependencies
+  image_description: The plot displays a 9×9 Sudoku grid with a clean black-and-white
+    design. The title "sudoku-basic · letsplot · pyplots.ai" appears at the top in
+    bold black text. The grid shows thick black lines separating the nine 3×3 boxes,
+    with thinner gray lines dividing individual cells within each box. Numbers 1-9
+    are displayed in bold black font, centered in their respective cells. Empty cells
+    are shown as blank (no zeros displayed). The puzzle contains a partially filled
+    standard Sudoku with numbers like 5,3 in the top-left, 7 in the top-middle area,
+    etc. The overall layout is square (1:1 aspect ratio) with the grid well-centered
+    and utilizing good canvas space. The monochrome design is clean and suitable for
+    printing.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and readable, numbers are large and clear with bold
+          font
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, each number is cleanly centered in its cell
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid lines are perfectly visible, thick vs thin distinction is clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Black and white design is perfectly accessible
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format with good margins, grid fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Sudoku grid (no axes needed, but criteria requires labels)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid lines are well-designed with clear visual hierarchy
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 9×9 Sudoku grid
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Numbers correctly placed in grid cells
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thick lines for 3×3 boxes, thin lines for cells, empty cells blank
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 9 rows and columns visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed for Sudoku)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: sudoku-basic · letsplot · pyplots.ai'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows valid Sudoku puzzle with mix of filled and empty cells
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses a real, recognizable Sudoku puzzle pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standard 9×9 grid with values 1-9
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: false
+        comment: Data is hardcoded (deterministic), no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: true
+        comment: Imports pandas but could use simpler dict/list approach
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses lets-plot grammar of graphics
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of geom_tile, geom_segment, geom_text, theme_void, coord_fixed
+  verdict: APPROVED
diff --git a/plots/sudoku-basic/metadata/matplotlib.yaml b/plots/sudoku-basic/metadata/matplotlib.yaml
index 3e33e52e5b..5d2ead0477 100644
--- a/plots/sudoku-basic/metadata/matplotlib.yaml
+++ b/plots/sudoku-basic/metadata/matplotlib.yaml
@@ -24,3 +24,174 @@ review:
   - Could use a slightly larger canvas margin at the bottom to perfectly balance the
     title padding at top
   - No use of advanced matplotlib features like style contexts or rcParams
+  image_description: 'The plot displays a clean, well-structured 9×9 Sudoku grid in
+    a square format. The grid uses a crisp black-and-white monochrome design with
+    clear visual hierarchy: thick black lines (approximately 5px) separate the nine
+    3×3 boxes, while thinner lines (approximately 1.5px) divide individual cells within
+    each box. Numbers 1-9 are displayed in large, bold black font centered within
+    their cells. Empty cells are left blank (zeros not displayed). The title "sudoku-basic
+    · matplotlib · pyplots.ai" appears at the top in bold black text. The grid fills
+    the canvas well with balanced white margins around all sides. The overall appearance
+    is clean, professional, and optimized for printing.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Numbers are large (fontsize=42), bold, and perfectly readable. Title
+          at fontsize=32 is clear.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; all numbers cleanly centered in cells
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid lines appropriately sized with clear thick/thin distinction
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Pure black-and-white design, excellent contrast, fully accessible
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square format appropriate for symmetric grid; good canvas utilization,
+          slight margin asymmetry
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Sudoku grid (axes hidden intentionally), full points as this
+          is correct behavior
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid/legend needed, but no points since criteria expects these
+          elements when applicable
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: 9×9 Sudoku grid as specified'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid data correctly positioned; 0 values shown as blank
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: thick 3×3 box boundaries, thin cell lines,
+          centered numbers, blank empty cells, monochrome design'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 9×9 grid displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "sudoku-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows a realistic partially-filled Sudoku puzzle with good distribution
+          of clues
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses a valid, solvable Sudoku puzzle configuration
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Standard 9×9 grid with values 1-9, exactly as expected
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean flat structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no randomness involved)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as 'plot.png' but output comment says 3600x3600 at dpi=300
+          would be 12×12 inches which is correct
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib patches, text positioning, and line drawing effectively,
+          but no advanced features like rcParams customization or style sheets
+  verdict: APPROVED
diff --git a/plots/sudoku-basic/metadata/plotly.yaml b/plots/sudoku-basic/metadata/plotly.yaml
index 8db8eadf06..29023befe3 100644
--- a/plots/sudoku-basic/metadata/plotly.yaml
+++ b/plots/sudoku-basic/metadata/plotly.yaml
@@ -26,3 +26,175 @@ review:
   - Could add subtle hover tooltips showing cell coordinates to leverage Plotly interactivity
   - Thin line width (1.5) could be slightly thicker (2.0) for better print visibility
     at smaller sizes
+  image_description: The plot displays a 9×9 Sudoku puzzle grid on a clean white background.
+    The grid uses a clear visual hierarchy with thick black lines (width 5) separating
+    the nine 3×3 box regions, and thinner black lines (width 1.5) dividing individual
+    cells. Numbers (1-9) are displayed in large black Arial Black font, centered within
+    their cells. Empty cells are properly blank with no zeros shown. The title "sudoku-basic
+    · plotly · pyplots.ai" appears centered at the top in black text. The puzzle shows
+    a classic partially-filled Sudoku with numbers distributed across all regions.
+    The overall design is monochrome (black and white only), clean, and suitable for
+    printing.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Numbers are large (font size 48), bold, and perfectly readable. Title
+          is clear at size 36.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; each number is perfectly centered in its
+          cell.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid lines are appropriately sized with clear thick/thin distinction.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Pure black on white, perfect contrast, no color-based information.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid fills canvas well with balanced margins, good use of square
+          format.
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Sudoku (no axes needed, but no points awarded).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid lines serve as the visualization itself; no legend needed.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: 9×9 Sudoku grid with proper box divisions.'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Numbers correctly placed in cells, row/column positioning accurate.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: thick box lines, thin cell lines, centered
+          numbers, empty cells blank.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 9×9 grid displayed with proper boundaries.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Sudoku (no legend needed).
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "sudoku-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows a valid Sudoku puzzle with varied number placement across all
+          boxes. Could benefit from showing more numbers to demonstrate fuller coverage.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses a real, solvable Sudoku puzzle (the classic "Wikipedia Sudoku").
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Numbers 1-9 are appropriate; slightly sparse distribution (30 given
+          numbers).
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → figure → traces → layout → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Fixed data (no random elements), deterministic output.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only `plotly.graph_objects` imported and used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but uses 1200×1200 base (3600×3600 with scale=3)
+          instead of standard sizes.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Plotly's annotation system and Scatter traces for custom drawing.
+          Also generates HTML for interactivity. Could leverage more Plotly-specific
+          features like hover information for educational purposes.
+  verdict: APPROVED
diff --git a/plots/sudoku-basic/metadata/plotnine.yaml b/plots/sudoku-basic/metadata/plotnine.yaml
index 8ebb5791fd..340cf98a4d 100644
--- a/plots/sudoku-basic/metadata/plotnine.yaml
+++ b/plots/sudoku-basic/metadata/plotnine.yaml
@@ -24,3 +24,166 @@ review:
   - Could use slightly lighter gray for thin lines to increase contrast with thick
     lines
   - The figure_size could be larger for even better resolution at 300 DPI
+  image_description: The plot displays a 9×9 Sudoku grid on a clean white background.
+    The title "sudoku-basic · plotnine · pyplots.ai" appears at the top in bold black
+    text. Thick black lines (approximately 2pt weight) clearly delineate the nine
+    3×3 box regions at positions 0, 3, 6, and 9. Thinner gray lines separate individual
+    cells within each box. Numbers (5, 3, 7, 6, 1, 9, 5, 9, 8, 6, 8, 6, 3, 4, 8, 3,
+    1, 7, 2, 6, 6, 2, 8, 4, 1, 9, 5, 8, 7, 9) are displayed in bold black font, centered
+    within their respective cells. Empty cells (zeros in the data) are rendered as
+    blank spaces. The grid uses a square 1:1 aspect ratio with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, numbers are large (size=28) and perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid lines and numbers are optimally sized for the visualization
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Monochrome black/white/gray design is fully accessible
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format is perfect for Sudoku, grid fills canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Sudoku (no axes required) - not deducted but not applicable
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid IS the visualization, no legend needed - not deducted
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 9×9 Sudoku grid visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid positions correctly mapped, row 0 at top
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thick lines for 3×3 boxes, thin lines for cells, empty cells blank
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 9×9 grid displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed for Sudoku)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "sudoku-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows valid partially-filled Sudoku with varied number distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses a real, solvable Sudoku puzzle pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Numbers 1-9 are correct; could have more starting numbers for variety
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → dataframes → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded puzzle)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as "plot.png"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar (geom_tile, geom_segment, geom_text, theme_void,
+          coord_fixed), but nothing especially advanced
+  verdict: APPROVED
diff --git a/plots/sudoku-basic/metadata/pygal.yaml b/plots/sudoku-basic/metadata/pygal.yaml
index 774fbfdc27..f7bf6d6df2 100644
--- a/plots/sudoku-basic/metadata/pygal.yaml
+++ b/plots/sudoku-basic/metadata/pygal.yaml
@@ -24,3 +24,159 @@ review:
   - Code uses a custom class (SudokuGrid) instead of simple procedural code, violating
     the KISS principle for plot implementations
   - sys.path manipulation is unnecessary complexity that could be avoided
+  image_description: The plot displays a 9×9 Sudoku grid with the title "sudoku-basic
+    · pygal · pyplots.ai" at the top. The grid features thick black lines separating
+    the nine 3×3 box regions, with thinner gray lines dividing individual cells within
+    each box. Numbers 1-9 are displayed in a blue color (#306998) in their starting
+    positions, centered within their respective cells. Empty cells are left blank.
+    The background is white, creating a clean monochrome design. The grid is well-centered
+    and fills a good portion of the square canvas. The numbers are bold and clearly
+    legible with appropriate font sizing.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All numbers perfectly readable with appropriate font size, title
+          clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, numbers well-centered in cells
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid lines and numbers sized optimally for the content
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue numbers on white background with black/gray grid lines - excellent
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid well-centered with appropriate margins, fills ~70% of canvas
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 9×9 Sudoku grid with proper structure
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Numbers correctly placed in grid positions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Thick lines for 3×3 boxes, thin lines for cells, centered numbers,
+          empty cells blank
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 81 cells properly displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A, appropriately omitted for Sudoku
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct "sudoku-basic · pygal · pyplots.ai" format
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows classic Sudoku puzzle with mixed difficulty areas (dense and
+          sparse regions)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Standard valid Sudoku puzzle, recognizable format
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Numbers 1-9 as expected, appropriate starting cell count (~30 given
+          numbers)
+    code_quality:
+      score: 5
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses custom class (SudokuGrid) which violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic puzzle data, no random generation
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Custom class approach adds complexity
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Outputs plot.png, plot.svg, and plot.html correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Custom pygal Graph subclass demonstrates advanced SVG capabilities,
+          renders to multiple formats (SVG, PNG, HTML)
+  verdict: APPROVED
diff --git a/plots/sudoku-basic/metadata/seaborn.yaml b/plots/sudoku-basic/metadata/seaborn.yaml
index 0a50adafe8..71cf6695cd 100644
--- a/plots/sudoku-basic/metadata/seaborn.yaml
+++ b/plots/sudoku-basic/metadata/seaborn.yaml
@@ -24,3 +24,178 @@ review:
   weaknesses:
   - Minimal use of seaborn distinctive features - sns.heatmap is only used to create
     a blank white grid, with all meaningful visualization done via matplotlib
+  image_description: The plot displays a standard 9×9 Sudoku grid with clean visual
+    hierarchy. The background is white with light gray thin lines separating individual
+    cells. Thick black lines clearly separate the nine 3×3 box regions. Numbers are
+    displayed in a blue color (#306998), bold, and centered within their cells. Empty
+    cells are shown as blank (no zeros displayed). The title "sudoku-basic · seaborn
+    · pyplots.ai" appears at the top in the same blue color. The grid is perfectly
+    square with proper proportions, and all numbers are clearly readable.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All numbers are large (fontsize=32), bold, and perfectly readable.
+          Title is clear at fontsize=28.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements; each number is centered in its cell with
+          ample spacing.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid lines are appropriately sized - thick (4px) for 3×3 boxes, thin
+          for cells. Numbers are highly visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) on white is excellent contrast and colorblind-safe.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square 12×12 figure with grid filling the canvas well, balanced margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Sudoku grid - no axis labels needed, but criteria expects
+          them.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid lines are subtle gray (#CCCCCC) for cells, bold black for boxes.
+          No legend needed.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: standard 9×9 Sudoku grid with proper visual hierarchy.'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid data correctly mapped to cell positions; 0s displayed as empty
+          cells.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: thick lines for 3×3 boxes, thin lines
+          for cells, numbers centered, monochrome-friendly design.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 81 cells shown within proper bounds.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A - no legend needed for Sudoku grid.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "sudoku-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows a valid partially-filled Sudoku puzzle with good distribution
+          of starting numbers (30 clues), but doesn't demonstrate edge cases like
+          very sparse or dense puzzles.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Uses a real, solvable Sudoku puzzle configuration - this is a well-known
+          puzzle pattern.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 1-9 are appropriate for Sudoku; standard 9×9 grid.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → style → plot → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded puzzle), no random elements.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports are used: matplotlib.patches, plt, numpy, seaborn.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn and matplotlib APIs.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with proper dpi and settings.
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses sns.heatmap as a base, but only for creating a blank grid structure.
+          The actual visualization (numbers, thick lines) is done entirely with matplotlib.
+          Seaborn is used minimally - just for set_style, set_context, and an all-white
+          heatmap. This doesn't leverage seaborn's distinctive statistical visualization
+          capabilities.
+  verdict: APPROVED
diff --git a/plots/sunburst-basic/metadata/altair.yaml b/plots/sunburst-basic/metadata/altair.yaml
index 877000872a..133097eb91 100644
--- a/plots/sunburst-basic/metadata/altair.yaml
+++ b/plots/sunburst-basic/metadata/altair.yaml
@@ -24,3 +24,172 @@ review:
     labels (rely on hover tooltips)
   - Legend positioned far from chart, creating visual imbalance
   - Could benefit from more interactive features like click-to-zoom that Altair supports
+  image_description: 'The plot displays a sunburst chart with three concentric rings
+    representing a hierarchical company budget breakdown. The innermost ring shows
+    4 departments (Engineering, Operations, Marketing, Sales) with white bold text
+    labels positioned at the center of each arc. The middle ring displays teams within
+    each department using lighter shades of the parent department color. The outermost
+    ring shows projects with the lightest color shades. Engineering (Python Blue #306998)
+    occupies the largest segment, followed by Sales (Coral), Marketing (Yellow), and
+    Operations (Teal). White stroke lines clearly separate all segments. A legend
+    is positioned on the right side listing the four departments. The title "sunburst-basic
+    · altair · pyplots.ai" appears at the top center.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Department labels readable but slightly small for outer rings; inner
+          labels excellent
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, clean separation
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: All segments clearly visible with good sizing and white strokes
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with distinct hues (blue, yellow, teal, coral)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good chart centering but legend slightly isolated on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for sunburst charts (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-organized, no grid needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sunburst chart with concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Hierarchy properly mapped: Department > Team > Project'
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 levels, segment angles proportional
+          to values'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows departments with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: sunburst-basic · altair · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 departments, multiple teams and projects; demonstrates hierarchy
+          well but all leaf nodes similar size
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company budget breakdown is plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Budget values ($K) are sensible, though range could be wider
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly linear but has some complex angle calculations (acceptable
+          for sunburst)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses mark_arc, layering, and tooltips; could use more interactive
+          features
+  verdict: APPROVED
diff --git a/plots/sunburst-basic/metadata/bokeh.yaml b/plots/sunburst-basic/metadata/bokeh.yaml
index 1896fa6ba9..f6500895ee 100644
--- a/plots/sunburst-basic/metadata/bokeh.yaml
+++ b/plots/sunburst-basic/metadata/bokeh.yaml
@@ -29,3 +29,172 @@ review:
     are adjacent
   - Level 1 labels use 3-character abbreviations that may not be immediately clear
     (Eng, Mar, Sal, Ope)
+  image_description: 'The plot displays a sunburst chart showing a company budget
+    hierarchy across three concentric rings. The innermost ring shows four departments:
+    Engineering (Eng, blue), Marketing (Mar, yellow), Sales (Sal, red), and Operations
+    (Ope, green). The middle ring shows teams within each department (Backend, Frontend,
+    DevOps, Digital, Content, Enterprise, SMB, Support, HR) in lighter shades of the
+    parent colors. The outermost ring shows projects (API, Database, Web App, Mobile,
+    Infrastructure, Social Media, SEO, Blog, Video, EMEA, APAC, Direct, Tier 1, Tier
+    2, Recruiting) in the lightest shades. A legend on the right shows the department
+    color gradients with budget totals. Ring level labels (Dept, Team, Project) appear
+    below the chart. The title "sunburst-basic · bokeh · pyplots.ai" is centered at
+    the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is 32pt, labels are 14-22pt, all readable but some outer labels
+          are slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Wedges are clearly visible with good separation via white borders
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but red/green combination could be problematic
+          for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent layout, chart fills canvas well, legend is properly positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for sunburst (no axes), but ring level labels are present
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is informative with gradient swatches and budget values
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sunburst chart with concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchical data correctly mapped to rings (level_1→level_2→level_3)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 hierarchy levels, proportional segments,
+          clear separation'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, segments span correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend shows all departments with budget totals
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "sunburst-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchy well but some smaller segments lack labels
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company budget breakdown is a realistic, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Budget values are plausible but units (K) only shown in legend
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Structure is logical but uses nested loops which adds complexity
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Bokeh API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses annular_wedge effectively but does not leverage Bokeh's interactive
+          features (hover tooltips would enhance this visualization)
+  verdict: APPROVED
diff --git a/plots/sunburst-basic/metadata/highcharts.yaml b/plots/sunburst-basic/metadata/highcharts.yaml
index 12de2efc5d..239832daa1 100644
--- a/plots/sunburst-basic/metadata/highcharts.yaml
+++ b/plots/sunburst-basic/metadata/highcharts.yaml
@@ -24,3 +24,164 @@ review:
   - Some outer ring labels are difficult to read due to segment size and rotation
     angle
   - Could improve tooltip visibility documentation in code comments
+  image_description: 'The sunburst chart displays a hierarchical budget allocation
+    across three concentric rings. The innermost ring shows three departments: Engineering
+    (deep blue, ~55% of total), Marketing (golden yellow, ~25%), and Operations (purple,
+    ~20%). The middle ring shows teams within each department with brightness variations
+    of the parent color. The outer ring displays individual projects as the finest
+    granularity. The title "sunburst-basic · highcharts · pyplots.ai" appears at the
+    top with a subtitle "Budget Allocation by Department, Team, and Project". Labels
+    use circular rotation and white text with outline for readability. Colors are
+    distinct and colorblind-friendly (blue/yellow/purple palette).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Most text readable, some smaller segment labels slightly difficult
+          to read
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text thanks to filtering on small segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Segments well-sized, though some outer ring segments quite thin
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-safe palette (blue/yellow/purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart fills canvas well, good proportions
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed, legend implicit in hierarchy
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sunburst chart type with concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchy correctly shows parent→child relationships
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 hierarchy levels, proportional angles,
+          consistent branch colors'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Color consistency shows relationships
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: sunburst-basic · highcharts · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied segment sizes, 3 levels, different proportions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation scenario is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values in $K are sensible
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sunburst module, levels configuration, colorVariation, allowDrillToNode,
+          but could leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/sunburst-basic/metadata/letsplot.yaml b/plots/sunburst-basic/metadata/letsplot.yaml
index c9b4924e4c..7de32e925b 100644
--- a/plots/sunburst-basic/metadata/letsplot.yaml
+++ b/plots/sunburst-basic/metadata/letsplot.yaml
@@ -27,3 +27,174 @@ review:
   - Could benefit from a simple legend explaining the color-branch relationship
   - Some segments (like SMB, Retail) appear relatively small; could show more variation
     in data
+  image_description: 'The plot displays a sunburst chart with three concentric rings
+    representing an organizational budget hierarchy. The innermost ring shows three
+    main departments: **Eng** (dark blue), **Sales** (yellow/gold), and **Mktg** (green)
+    - each with white bold text labels. The second ring shows subdivisions: Backend
+    and Frontend for Eng (lighter blues), North and South for Sales (lighter yellows),
+    and Digital and Brand for Mktg (lighter greens). The outermost ring displays projects/categories:
+    API, Database, Web App, Mobile (blues), Enterprise, SMB, Retail (yellows), and
+    SEO, Ads, Events (greens). White borders separate all segments clearly. The chart
+    is centered on a clean white background with the title "sunburst-basic · letsplot
+    · pyplots.ai" at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text is clearly readable; title is prominent, inner labels are
+          bold white, outer labels have good contrast
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels are well-positioned within segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Segments are well-sized with clear proportions and white separators
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good color scheme using blue/yellow/green branches with consistent
+          shading (-1: yellow text on light yellow segments could be improved)'
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Chart is well-centered but could use more canvas space (-1: some
+          empty space around chart)'
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for sunburst; title present and properly formatted
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend present (not strictly needed but could help clarify hierarchy)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sunburst chart with concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Three hierarchy levels properly mapped to rings
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows hierarchical data, proportional segments, consistent branch
+          colors, clear level separation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All segments visible and properly proportioned
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Labels directly on segments (appropriate for sunburst)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "sunburst-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows 3 levels, varying segment sizes, multiple branches (-1: could
+          show more variation in leaf sizes)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Organizational budget breakdown is realistic and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values (6-18) are plausible for relative comparison
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: 'Uses helper function `create_wedge` (-2: violates KISS principle
+          of no functions)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_polygon and manual polygon construction; while creative,
+          doesn't leverage lets-plot's native pie/sector capabilities if available
+  verdict: APPROVED
diff --git a/plots/sunburst-basic/metadata/matplotlib.yaml b/plots/sunburst-basic/metadata/matplotlib.yaml
index 336b58073b..e3247f12ed 100644
--- a/plots/sunburst-basic/metadata/matplotlib.yaml
+++ b/plots/sunburst-basic/metadata/matplotlib.yaml
@@ -26,3 +26,174 @@ review:
   - Could use more extreme value variation between segments to better demonstrate
     proportional sizing
   - White label color on inner ring may reduce contrast on lighter portions
+  image_description: 'The plot displays a sunburst chart with three concentric rings
+    showing a company budget breakdown. The innermost ring shows three departments:
+    Engineering (blue, #306998), Sales (yellow, #FFD43B), and Marketing (olive/gold).
+    The middle ring displays teams: Frontend, Backend, DevOps under Engineering; North,
+    South under Sales; Digital, Brand under Marketing. The outer ring shows individual
+    projects like Web App, Mobile, API, Database, Cloud, CI/CD, Enterprise, SMB, SEO,
+    Ads, Events, and Content. The chart uses Python branding colors as the base palette
+    with lighter variations for child levels. The title "Company Budget · sunburst-basic
+    · matplotlib · pyplots.ai" appears at the top. The chart fills the square canvas
+    well with proper spacing and white edge separators between segments.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, department labels are 18pt white bold, team labels
+          14pt, project labels 12pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; smaller segments appropriately skip labels
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Ring widths and segment sizes are well adapted; white edges provide
+          clear separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses blue/yellow/olive palette that is largely colorblind-friendly,
+          but some lighter variations may be difficult to distinguish
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square figure (12x12) fills canvas excellently; chart is centered
+          with balanced margins
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend needed for labeled segments, but could benefit from a legend
+          or annotation showing the value hierarchy
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sunburst chart with concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Hierarchy levels correctly mapped: Level 1 (departments) → Level
+          2 (teams) → Level 3 (projects)'
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 hierarchy levels, proportional angles,
+          consistent branch colors'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 projects, 6 teams, and 3 departments visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Labels directly on segments are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Company Budget · sunburst-basic · matplotlib
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied hierarchy with different segment sizes, but all level-1
+          categories are roughly similar in size
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company budget breakdown is a real, comprehensible scenario with
+          appropriate categories
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Budget values in $thousands are reasonable (60-200k per project),
+          though some variation could be more extreme
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → processing → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Deterministic data used (no random), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib pie() API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pie() with wedgeprops for ring widths which is matplotlib's
+          approach to sunburst, but this is not a specialized sunburst function; it's
+          a manual construction using nested pie charts
+  verdict: APPROVED
diff --git a/plots/sunburst-basic/metadata/plotly.yaml b/plots/sunburst-basic/metadata/plotly.yaml
index b61d2fa4fb..3d6d22818c 100644
--- a/plots/sunburst-basic/metadata/plotly.yaml
+++ b/plots/sunburst-basic/metadata/plotly.yaml
@@ -26,3 +26,164 @@ review:
     variation
   - Chart could be slightly larger to better utilize canvas space for maximum visual
     impact
+  image_description: 'The sunburst chart displays a company budget breakdown with
+    three hierarchical levels rendered as concentric rings. The innermost circle shows
+    "Company Budget" in dark Python Blue (#306998). The second ring displays four
+    departments: Engineering (Python Blue), Sales (Python Yellow #FFD43B), Marketing
+    (Light Blue), and Operations (Light Yellow). The outermost ring shows teams within
+    each department: Backend, Frontend, DevOps under Engineering; Enterprise, SMB
+    under Sales; Digital, Brand under Marketing; and HR, Finance under Operations.
+    Labels are displayed radially within each segment. The chart uses white line separators
+    between segments, with the title "sunburst-basic · plotly · pyplots.ai" centered
+    at the top. The overall layout is well-balanced with the chart centered on a white
+    background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text is readable; font size is adequate, though some smaller
+          segments have slightly compressed text
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text, all labels fit within their segments
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: all segments are clearly visible with appropriate proportional sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: uses Python Blue/Yellow palette with variations, colorblind-friendly
+          blue-yellow scheme
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: chart is well-centered but could utilize slightly more canvas area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct sunburst chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: hierarchical levels correctly mapped (root → departments → teams)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'all spec features present: 3 hierarchy levels, proportional segment
+          angles, consistent colors within branches'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data segments visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: labels accurately represent data categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: correctly uses "sunburst-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows multi-level hierarchy with varying segment sizes; could benefit
+          from more variation in proportions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: company budget breakdown is an excellent real-world scenario matching
+          spec's application examples
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: budget values in millions are reasonable; some teams have similar
+          values reducing visual contrast
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save structure, no unnecessary functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only plotly.graph_objects is imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: uses go.Sunburst with branchvalues="total", custom hover template,
+          radial text orientation; could leverage more Plotly interactivity features
+          in HTML output
+  verdict: APPROVED
diff --git a/plots/sunburst-basic/metadata/seaborn.yaml b/plots/sunburst-basic/metadata/seaborn.yaml
index b345a0c54e..d2060761f0 100644
--- a/plots/sunburst-basic/metadata/seaborn.yaml
+++ b/plots/sunburst-basic/metadata/seaborn.yaml
@@ -30,3 +30,176 @@ review:
     with wedge edges
   - The bar chart colors do not match the sunburst branch colors (Engineering shows
     blue in sunburst but bar chart has different mapping due to hue parameter)
+  image_description: 'The plot displays a sunburst chart showing a company budget
+    breakdown across three hierarchical levels. The visualization consists of two
+    parts: (1) a sunburst/donut chart on the left with concentric rings - the innermost
+    ring shows three departments (Engineering in blue, Sales in yellow, Marketing
+    in lighter blue) with labels in white bold text. The middle ring displays teams
+    (Frontend, Backend, DevOps, North, South, Digital, Brand) in varying shades. The
+    outermost ring shows projects (Web App, Mobile, API, Database, Cloud, CI/CD, Enterprise,
+    SMB, SEO, Ads, Events, Content) in the lightest shades. The center displays "$1,530K
+    Total" in blue text. (2) A horizontal bar chart on the right titled "Department
+    Totals" showing Engineering ($700K), Sales ($500K), and Marketing ($330K) with
+    value labels. The title reads "Company Budget · sunburst-basic · seaborn · pyplots.ai"
+    at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable; center text, department labels, and bar labels
+          are clear. Some outer ring labels are slightly small.
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor crowding on smaller segments (CI/CD, Database labels near edges)
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Segments well-sized with good ring widths and clear separation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/Yellow/Light Blue palette is colorblind-friendly, no red-green
+          conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of dual-panel layout, sunburst fills left side well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Bar chart has descriptive "Budget ($K)" label with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend for the sunburst (relies on inline labels); bar chart grid
+          is subtle
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct sunburst chart with concentric rings
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Level 1 (departments) innermost, Level 2 (teams) middle, Level 3
+          (projects) outer
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has hierarchical levels, proportional segments, consistent branch
+          colors. Labels present but no hover/interaction (static image limitation)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, segments properly sized
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No explicit legend, but inline labels are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Company Budget · sunburst-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 3 hierarchy levels, varying segment sizes, parent-child relationships
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Company budget breakdown is a perfect real-world use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values ($60K-$200K) are realistic for project/team allocations
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic but no random seed needed (no random data used)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses `hue` parameter correctly for modern seaborn, but `legend=False`
+          pattern is good
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of seaborn's `sns.set_theme()`, `sns.light_palette()`
+          for color gradients, and `sns.barplot()` for the summary chart
+  verdict: APPROVED
diff --git a/plots/surface-basic/metadata/altair.yaml b/plots/surface-basic/metadata/altair.yaml
index cb0f1c1f69..a423db7847 100644
--- a/plots/surface-basic/metadata/altair.yaml
+++ b/plots/surface-basic/metadata/altair.yaml
@@ -26,3 +26,163 @@ review:
     limitation)
   - Axis labeling could be clearer - X (projected) and Z (height) may confuse users
     unfamiliar with 3D projection
+  image_description: The plot displays a 3D surface projected onto 2D, representing
+    the mathematical function z = sin(x) * cos(y). The surface is rendered using rectangular
+    facets with the viridis colormap - yellow/bright green for peaks (~0.5 to 1.0)
+    and dark purple/blue for valleys (~-0.5 to -1.0). The title "surface-basic · altair
+    · pyplots.ai" appears at the top. The X-axis is labeled "X (projected)" ranging
+    from -5 to 5, and the Y-axis shows "Z (height)" from -2 to 2. A legend on the
+    right displays "Z Value" with the color scale. The surface shows characteristic
+    wave patterns of the sin*cos function with multiple peaks and valleys. Subtle
+    blue-gray grid lines and faint edges on the facets provide structure without overwhelming
+    the visualization.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: All text readable, font sizes appropriate (18pt labels, 22pt titles,
+          28pt main title)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Surface visible but rectangle approximation shows minor edge artifacts
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, surface fills plot area well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels descriptive but naming could be clearer for projected coordinates
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid (alpha=0.3), well-placed legend
+    spec_compliance:
+      score: 21
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 6
+        max: 8
+        passed: true
+        comment: Creative 3D surface approximation in a 2D library via projection
+      - id: SC-02
+        name: Data Mapping
+        score: 4
+        max: 5
+        passed: true
+        comment: X/Y/Z mapped correctly, projection adds complexity
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has colormap, colorbar; pan/zoom replaces rotation appropriately
+          for 2D
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend title "Z Value" is accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "surface-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows complete sin*cos surface with peaks and valleys
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Classic mathematical function example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in [-1, 1] appropriate for sin*cos
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions or classes, clean script structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 4
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/surface-basic/metadata/bokeh.yaml b/plots/surface-basic/metadata/bokeh.yaml
index 7b6ab94ff2..db49e2791f 100644
--- a/plots/surface-basic/metadata/bokeh.yaml
+++ b/plots/surface-basic/metadata/bokeh.yaml
@@ -24,3 +24,168 @@ review:
   - Axis labels are generic (X, Y, Z) without descriptive context or units
   - Colorbar title Z Value is truncated/rotated, could be improved
   - The HTML output is also generated which may not be necessary for static review
+  image_description: The plot displays a 3D surface rendered in 2D using an isometric
+    projection. The surface represents z = sin(x) * cos(y), showing smooth wave-like
+    undulations. The Viridis colormap is used, with yellow/green representing peaks
+    (z ≈ 0.5 to 1) and dark purple/blue representing valleys (z ≈ -1 to -0.5). The
+    title "surface-basic · bokeh · pyplots.ai" appears in the top-left corner. Axis
+    labels X, Y, and Z are placed around the plot in gray text. A vertical colorbar
+    on the right shows "Z Value" with tick marks from -0.5 to 0.5. The surface is
+    rendered with subtle blue (#306998) edge lines and the background is light gray
+    (#fafafa). The plot fills the canvas well with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and colorbar labels readable, axis labels slightly small but
+          acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Surface patches well-sized, subtle edge lines enhance 3D effect
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight imbalance with empty bottom-left
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels present but generic (X, Y, Z without units or context)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, colorbar present but title partially cut off
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D surface plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly assigned and projected
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Colorbar present, axis labels present, but no rotation (static image)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full surface visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Colorbar labels correct but generic
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "surface-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows peaks and valleys of sin*cos surface well
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Mathematical function demo, plausible but generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values -1 to 1 appropriate for trigonometric function
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → projection → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not strictly needed here)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of patches for 3D surface, painter's algorithm for depth
+          sorting, LinearColorMapper, ColorBar, custom text annotations
+  verdict: APPROVED
diff --git a/plots/surface-basic/metadata/letsplot.yaml b/plots/surface-basic/metadata/letsplot.yaml
index 206f14876f..ca8d2277d2 100644
--- a/plots/surface-basic/metadata/letsplot.yaml
+++ b/plots/surface-basic/metadata/letsplot.yaml
@@ -27,3 +27,178 @@ review:
     or note the projection in the label)
   - The Y-axis original dimension is lost in the final visualization (only X and Z
     are labeled)
+  image_description: The plot displays a 3D surface visualization of z = sin(x) *
+    cos(y) rendered as a 2D projection using filled polygons with the painter's algorithm.
+    The surface shows characteristic wavy peaks and valleys with a smooth viridis
+    colormap - yellow/green for positive Z values (peaks around +0.5) and purple/blue
+    for negative Z values (valleys around -0.5). The wireframe edges are visible in
+    a subtle blue color (#306998). The title "surface-basic · letsplot · pyplots.ai"
+    appears at the top. Axis labels show "X (projected)" on the horizontal axis and
+    "Z (height)" on the vertical axis. A colorbar legend labeled "Z Value" is positioned
+    on the right side. The surface appears properly rotated with elevation ~25° and
+    azimuth ~45°, giving good depth perception of the 3D structure.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at the
+          output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Surface polygons are visible with good color differentiation; wireframe
+          lines are subtle but effective
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, slight imbalance with more whitespace on
+          right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X (projected)", "Z (height)") but lack proper
+          3D context units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well placed; no grid (appropriate for this 3D projection)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D surface plot type implemented via polygon projection
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y mapped correctly to create the 2D projection; Z encoded in both
+          height and color
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has colormap, colorbar, axis labels; interactive rotation not available
+          in lets-plot (static 2D library)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the plot area
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Z Value" colorbar accurately represents the surface height values'
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format but spacing around dots appears slightly inconsistent
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows peaks and valleys of the sine-cosine surface; demonstrates
+          positive and negative values well
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Mathematical function z = sin(x) * cos(y) is a classic example for
+          3D surface visualization
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Appropriate range (-3 to 3) showing multiple periods of the wave
+          function
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → projection → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not strictly needed for deterministic
+          math)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of geom_polygon with ggplot grammar, scale_fill_viridis,
+          theme customization; impressive solution to render 3D in a 2D library
+  verdict: APPROVED
diff --git a/plots/surface-basic/metadata/matplotlib.yaml b/plots/surface-basic/metadata/matplotlib.yaml
index 80b92b5d59..ad3a49f006 100644
--- a/plots/surface-basic/metadata/matplotlib.yaml
+++ b/plots/surface-basic/metadata/matplotlib.yaml
@@ -24,3 +24,168 @@ review:
     units
   - Font sizes could be slightly larger for optimal 4800x2700 rendering (xlabel/ylabel
     at 18pt vs recommended 20pt)
+  image_description: The plot displays a 3D surface showing a radial ripple/wave pattern
+    centered at the origin. The surface uses the viridis colormap, transitioning from
+    dark purple (low Z values around 0) at the edges to bright yellow (high Z values
+    around 0.8) at the center peak. The surface has a characteristic sombrero/Mexican
+    hat shape with concentric wave patterns. The title "surface-basic · matplotlib
+    · pyplots.ai" is clearly displayed at the top. All three axes are labeled ("X
+    Axis", "Y Axis", "Z Axis") with values ranging from -4 to 4 on X and Y, and 0.0
+    to 0.8 on Z. A vertical colorbar on the right shows the "Z Value" scale. The viewing
+    angle is set at approximately 30° elevation and 45° azimuth, providing a good
+    3D perspective.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable. Slightly
+          smaller than optimal for 4800x2700 but still good.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Surface is well-rendered with good resolution (40x40 grid), smooth
+          antialiasing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis is an excellent colorblind-safe colormap
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though colorbar is slightly separated from main
+          plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Axis", "Y Axis", "Z Axis") but lack units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: 3D grid visible but could be more subtle; colorbar serves as legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D surface plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y grid correctly mapped to Z height values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: smooth colormap (viridis), axis labels,
+          colorbar'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show complete data range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar correctly labeled "Z Value"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "surface-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows smooth surface with interesting radial pattern, demonstrates
+          height variation well. Could show more complex features.
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Mathematical function (sin * exp decay) is a classic demo. Plausible
+          but generic.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are sensible (-4 to 4 range, 0 to 0.8 height)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Uses deterministic mathematical function (good), but no explicit
+          random seed comment for clarity
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/surface-basic/metadata/plotly.yaml b/plots/surface-basic/metadata/plotly.yaml
index 68bafff848..9024b6a1b3 100644
--- a/plots/surface-basic/metadata/plotly.yaml
+++ b/plots/surface-basic/metadata/plotly.yaml
@@ -24,3 +24,173 @@ review:
   - Axis labels are generic (X Axis, Y Axis) rather than contextual
   - Layout has more whitespace than optimal; surface could fill more of the canvas
   - np.random.seed(42) is included but unnecessary since the data generation is deterministic
+  image_description: The plot displays a 3D surface visualization with the title "surface-basic
+    · plotly · pyplots.ai" centered at the top. The surface shows a smooth mathematical
+    function with peaks (yellow, ~1.3) and valleys (purple, ~-0.8) created from combining
+    sinusoidal patterns. The Viridis colormap transitions from dark purple/blue at
+    low Z values to bright yellow/green at high values. The X and Y axes range from
+    -4 to 4, with "X Axis" and "Y Axis" labels. The Z axis shows "Z Value" ranging
+    from approximately -1 to 1.5. A vertical colorbar on the right displays the Z
+    Value scale. The surface is viewed from a 3D perspective with subtle gray grid
+    lines, and the overall layout uses a clean white background.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 20pt, tick fonts at 14-16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Surface is well-rendered with 40x40 grid providing smooth detail
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Viridis colormap is colorblind-safe and provides excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Plot centered but has significant whitespace around it; surface could
+          be larger
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Axis", "Y Axis", "Z Value") but lack units
+          or more meaningful context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at alpha 0.1 is appropriately subtle; colorbar well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D surface plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y grid correctly mapped to Z height values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: smooth surface, colormap, colorbar, axis
+          labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with appropriate range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colorbar accurately represents Z values
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Title follows exact format `{spec-id} · {library} · pyplots.ai`
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows peaks and valleys but mathematical function is abstract rather
+          than showcasing full range of surface features
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Mathematical function demonstration is plausible but generic rather
+          than a real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values range appropriately for mathematical function visualization
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)` (though seed not used since data is deterministic)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png` and `plot.html`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Surface and colorbar, but does not leverage Plotly's interactive
+          features in the static output; HTML export included which is good
+  verdict: APPROVED
diff --git a/plots/survival-kaplan-meier/metadata/altair.yaml b/plots/survival-kaplan-meier/metadata/altair.yaml
index cab3a09aa0..188232b409 100644
--- a/plots/survival-kaplan-meier/metadata/altair.yaml
+++ b/plots/survival-kaplan-meier/metadata/altair.yaml
@@ -27,3 +27,180 @@ review:
     probability ceiling of 1.0
   - Legend placed inside plot area (upper-right) could overlap data in some scenarios;
     consider placing outside
+  image_description: The plot displays a Kaplan-Meier survival analysis comparing
+    two treatment groups over a 36-month follow-up period. Treatment A is shown in
+    blue (#306998) and Treatment B in yellow/gold (#FFD43B). Both survival curves
+    follow a step-function pattern starting at 1.0 survival probability and declining
+    over time. Treatment A (blue) shows better survival with approximately 40% survival
+    at 36 months, while Treatment B (yellow) drops to approximately 20% survival.
+    Shaded bands represent 95% confidence intervals around each curve, with appropriate
+    transparency. Small tick marks perpendicular to the curves indicate censored observations.
+    The title "survival-kaplan-meier · altair · pyplots.ai" appears at the top. The
+    y-axis shows "Survival Probability" (0.00-1.10) and x-axis shows "Time (Months)"
+    (0-40). A legend in the upper right identifies the two treatment groups. Grid
+    lines are subtle with dashed styling.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 22pt, tick labels at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step lines with strokeWidth=4 are clearly visible, CI bands at 0.25
+          opacity appropriately subtle
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow/gold are colorblind-safe (distinguishable under all
+          types)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, minor deduction for y-axis extending to
+          1.10 (unnecessary whitespace above 1.0)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Time (Months)" and "Survival Probability"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (0.3 opacity, dashed), but legend is placed in upper-right
+          corner overlapping with the plot area instead of outside
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Kaplan-Meier step-function survival plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time correctly on x-axis, survival probability on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes step function, 95% CI bands, censoring marks, group comparison
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range (0-38 months, 0-1.05+ probability)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels Treatment A and Treatment B
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "survival-kaplan-meier · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows diverging survival curves, censoring events, confidence intervals
+          widening over time
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial with treatment comparison - neutral, scientifically
+          appropriate scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 36-month follow-up, 80 patients per group, realistic event rates
+          (65% vs 75%)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Code contains a function `kaplan_meier()` - violates KISS principle
+          (should be inline)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's layered chart composition with + operator, step-after
+          interpolation, area marks for CI bands, and tick marks for censoring. However,
+          could leverage Altair's tooltip interactivity or selection features more.
+  verdict: APPROVED
diff --git a/plots/survival-kaplan-meier/metadata/bokeh.yaml b/plots/survival-kaplan-meier/metadata/bokeh.yaml
index 9af7804cb6..e8a0416a56 100644
--- a/plots/survival-kaplan-meier/metadata/bokeh.yaml
+++ b/plots/survival-kaplan-meier/metadata/bokeh.yaml
@@ -26,3 +26,181 @@ review:
     font or better positioning
   - Code uses helper functions instead of flat KISS structure preferred by guidelines
   - Censoring tick marks could be slightly more prominent for better visibility
+  image_description: 'The plot displays a Kaplan-Meier survival curve comparing two
+    groups: Treatment (n=80) in blue (#306998) and Control (n=80) in yellow/gold (#FFD43B).
+    Both curves start at survival probability 1.0 and decrease in characteristic step-function
+    fashion over time (0-36 months). The treatment group shows better survival (higher
+    curve) than the control group throughout. Each curve has a semi-transparent confidence
+    interval band (shaded regions). Censored observations are marked with small vertical
+    tick marks on the curves. A horizontal dashed gray line at y=0.5 indicates the
+    median survival reference. The title follows the required format "survival-kaplan-meier
+    · bokeh · pyplots.ai". Axes are labeled "Time (months)" and "Survival Probability".
+    The legend is positioned in the bottom-left corner. Grid lines are subtle and
+    dashed.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all readable at full size.
+          Font sizes are appropriately scaled for 4800x2700.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere in the plot.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step lines are clearly visible with good line width. Censoring marks
+          visible but could be slightly more prominent.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow/gold provide excellent contrast and are colorblind-safe
+          (not red-green).
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins. Good use of space.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Time (months)" includes units, "Survival Probability" is descriptive.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines and alpha. However, the legend is
+          very small and positioned in the bottom-left corner where it's somewhat
+          hard to read.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Kaplan-Meier step function survival curves.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, survival probability on Y-axis, correctly mapped.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: step function curves, 95% CI bands, censored
+          observation marks, group comparison with legend, median survival line.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range (0-38 months, 0-1.05 probability).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Treatment and Control with sample sizes.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "survival-kaplan-meier · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows declining survival, group differences, censoring, confidence
+          intervals. Could show more dramatic separation or crossing curves for fuller
+          demonstration.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial with treatment vs control is a classic, neutral, realistic
+          scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Time scale (months) and sample sizes (n=80) are realistic. Survival
+          curves show plausible decline patterns.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: Code uses helper functions (kaplan_meier, make_step_data) rather
+          than flat script structure.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, patch for CI bands, Span for median line,
+          scatter with custom markers for censoring. Good Bokeh usage but could leverage
+          HoverTool for interactivity.
+  verdict: APPROVED
diff --git a/plots/survival-kaplan-meier/metadata/highcharts.yaml b/plots/survival-kaplan-meier/metadata/highcharts.yaml
index 696309617f..a8e1e31f3e 100644
--- a/plots/survival-kaplan-meier/metadata/highcharts.yaml
+++ b/plots/survival-kaplan-meier/metadata/highcharts.yaml
@@ -25,3 +25,180 @@ review:
   - Legend positioned in middle-left area potentially overlapping with data visualization
     region
   - Censored markers could be slightly larger for better visibility at the high resolution
+  image_description: 'The plot displays a Kaplan-Meier survival analysis comparing
+    two treatment groups in a clinical trial. The title "survival-kaplan-meier · highcharts
+    · pyplots.ai" is prominently displayed at the top with a subtitle "Clinical Trial:
+    Survival Probability Over Time". Two step-function curves are shown: a blue line
+    for "Standard Treatment" and a yellow/gold line for "New Treatment". Each curve
+    has semi-transparent confidence interval bands (light blue and light yellow respectively).
+    The Y-axis shows "Survival Probability" ranging from 0 to 1, and the X-axis shows
+    "Time (Months)" from 0 to 36. The New Treatment (yellow) shows better survival
+    probability over time compared to Standard Treatment (blue). Small diamond-shaped
+    censored observation markers are visible on the curves. The legend is positioned
+    on the left side of the plot. Grid lines are subtle dashed lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step curves and CI bands are clearly visible; censored markers visible
+          but small
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills most of the area; slight imbalance
+          with legend position
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Time (Months)", "Survival Probability"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle and appropriate, but legend is positioned awkwardly
+          in the middle-left area overlapping the 0.6 region of the plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Kaplan-Meier step function visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, survival probability on Y-axis correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function ✓, 95% CI bands ✓, censored markers ✓, group comparison
+          ✓, legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-36 month range and 0-1 probability range shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Standard Treatment and New Treatment
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "survival-kaplan-meier · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two groups with different survival rates, CI bands widening
+          over time, censored observations; could show more pronounced early divergence
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial comparing standard vs new treatment is a perfect real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Weibull distributions produce realistic survival curves; 36-month
+          study period is appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions/classes, inline Kaplan-Meier calculation (fixed from
+          previous attempt)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs plot.png correctly but window-size in Chrome is slightly
+          off (4800,2900 vs container 4800,2700)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses LineSeries with step, AreaRangeSeries for CI bands, ScatterSeries
+          for censored marks; good use of Highcharts features but could leverage more
+          interactive features like tooltips with survival data
+  verdict: APPROVED
diff --git a/plots/survival-kaplan-meier/metadata/letsplot.yaml b/plots/survival-kaplan-meier/metadata/letsplot.yaml
index 69e816aea3..19199cb399 100644
--- a/plots/survival-kaplan-meier/metadata/letsplot.yaml
+++ b/plots/survival-kaplan-meier/metadata/letsplot.yaml
@@ -27,3 +27,179 @@ review:
   - Code uses helper functions instead of flat KISS structure required by code standards
   - Legend shows colored boxes instead of step-line representations
   - Missing optional median survival time annotation mentioned in spec notes
+  image_description: 'The plot displays a Kaplan-Meier survival curve comparing two
+    groups: Treatment (blue) and Control (red). The x-axis shows "Time (months)" ranging
+    from 0 to 36, and the y-axis shows "Survival Probability" from 0 to 1. Both curves
+    are rendered as step functions, starting at survival probability 1.0 and declining
+    over time. The Treatment group (blue) shows better survival outcomes, maintaining
+    higher probability throughout. Each curve has a shaded confidence interval band
+    (blue for Treatment, red for Control) that widens over time. Censored observations
+    are marked with plus (+) symbols along the curves. The legend is positioned on
+    the right side showing "Treatment" and "Control". The title correctly follows
+    the format "survival-kaplan-meier · letsplot · pyplots.ai". Grid lines are subtle
+    and the overall layout is clean with good use of the canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step curves and confidence bands are clearly visible; censoring marks
+          are visible but could be slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and red are distinguishable but red-blue is not the most colorblind-safe
+          combination
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization with well-balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Time (months)", "Survival Probability"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid visible but legend shows boxes instead of lines/steps which
+          is slightly misleading for a step plot
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step function for Kaplan-Meier survival curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, survival probability on Y-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has step function, confidence intervals, censoring marks, group comparison
+          with legend; missing median survival annotation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-36 month range and 0-1 probability scale shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Treatment and Control groups
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but "letsplot" should be "lets-plot" per library
+          name
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear difference between groups, censoring events, widening
+          confidence intervals over time; could show more dramatic crossing or convergence
+          scenarios
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial survival data is a perfect, neutral, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 36-month follow-up, realistic hazard rates, appropriate sample sizes
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses multiple helper functions (generate_survival_data, kaplan_meier,
+          get_survival_at_time) instead of flat structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 2
+        max: 2
+        passed: true
+        comment: Saves both plot.png and plot.html correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of lets-plot ggplot2 grammar: geom_step, geom_ribbon,
+          geom_point with shape parameter, scale_color_manual, scale_fill_manual,
+          theme_minimal with custom element_text sizing'
+  verdict: APPROVED
diff --git a/plots/survival-kaplan-meier/metadata/matplotlib.yaml b/plots/survival-kaplan-meier/metadata/matplotlib.yaml
index ba4e2a0ac7..2a7fdab28c 100644
--- a/plots/survival-kaplan-meier/metadata/matplotlib.yaml
+++ b/plots/survival-kaplan-meier/metadata/matplotlib.yaml
@@ -28,3 +28,177 @@ review:
     curve overlap regions
   - Median text box positioning could be adjusted to avoid any potential overlap with
     axis labels
+  image_description: 'The plot displays a Kaplan-Meier survival analysis comparing
+    two groups: Treatment Group (blue, #306998) and Control Group (yellow/gold, #FFD43B).
+    Both curves are step functions starting at survival probability 1.0 and decreasing
+    over time (0-60 months). The Treatment Group shows better survival with a median
+    of 21.9 months vs Control Group median of 16.6 months. Each curve has a semi-transparent
+    95% confidence interval band. Vertical tick marks along the curves indicate censored
+    observations. Dotted vertical lines mark the median survival times, with a horizontal
+    dotted line at 0.5 probability. A text box in the bottom-right corner displays
+    the median values. The legend is positioned in the upper-right corner. Grid is
+    subtle with alpha=0.3 and dashed lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, ticks at 16pt, legend at 16pt
+          - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step curves with linewidth=3, tick marks with s=400 are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow/gold are colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well, slight issue with confidence
+          bands extending to edges
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Time (months)" and "Survival Probability" are descriptive with
+          units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but legend is in upper right which could
+          potentially overlap with data in some scenarios; also median text box partially
+          overlaps with y-axis area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct step function Kaplan-Meier curves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X, survival probability on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step functions, 95% CI bands, censored tick marks, group comparison,
+          median annotations all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: 'X: 0-65 months, Y: 0-1.05 shows all data'
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Treatment Group and Control Group labels correct
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"survival-kaplan-meier · matplotlib · pyplots.ai" matches spec'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows two groups with different survival curves, censoring, CI widening
+          over time; could have shown more dramatic early separation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial survival data is a perfect real-world application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 60 months follow-up is realistic, but the exponential scale=24 for
+          treatment vs scale=16 for control could show clearer separation
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → K-M calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ax.step with where="post", fill_between with step="post",
+          scatter for tick marks, but no advanced matplotlib features like spines
+          customization or twin axes for at-risk table
+  verdict: APPROVED
diff --git a/plots/survival-kaplan-meier/metadata/plotly.yaml b/plots/survival-kaplan-meier/metadata/plotly.yaml
index d1970b4aa7..247021eee5 100644
--- a/plots/survival-kaplan-meier/metadata/plotly.yaml
+++ b/plots/survival-kaplan-meier/metadata/plotly.yaml
@@ -25,3 +25,182 @@ review:
     legend entry style
   - Code complexity is higher than typical KISS style due to manual KM calculation
     (though necessary without lifelines library)
+  image_description: 'The plot displays a Kaplan-Meier survival analysis with two
+    treatment groups. Treatment A (Experimental) is shown in blue (#306998) and Treatment
+    B (Standard Care) in yellow/gold (#FFD43B). Both curves start at 100% survival
+    probability at time 0 and decrease in characteristic step-function pattern over
+    time (0-50 months). The blue Treatment A curve shows better survival, ending around
+    37% at ~36 months, while the yellow Treatment B curve drops more steeply, reaching
+    ~15% at similar timepoints. Both curves have shaded confidence interval bands
+    - blue-tinted for A and yellow-tinted for B. Censored observations are marked
+    as vertical tick marks on/near the curves. The legend is positioned in the upper
+    right corner with clear labels. A median survival annotation box in the lower
+    left shows Treatment A: 22.4 months and Treatment B: 16.6 months. The title follows
+    the correct format. Axis labels show "Time (months)" and "Survival Probability"
+    with percentage formatting (0%-100%).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick fonts at 18pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlapping, all elements are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step curves are well-visible with width=4, censored tick marks visible
+          though some cluster at later times
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, distinct, and high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, plot fills ~60% with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Time (months)" and "Survival Probability" are descriptive with
+          units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha is subtle (0.2), but the legend shows 4 items including
+          "Censored (A)" and "Censored (B)" which could be combined or simplified
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Kaplan-Meier step function survival plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, survival probability on Y-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function, confidence intervals, censored markers, group comparison,
+          median survival annotation - all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full range 0-50 months and 0-100% survival
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies treatment groups and censored observations
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "survival-kaplan-meier · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows both groups with different survival curves, confidence intervals
+          widening over time, censoring, median annotation - minor: no at-risk table
+          mentioned in spec notes'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial comparing experimental drug vs standard care is a
+          realistic, neutral medical research scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 80 patients per group, 48-month follow-up, median survival 16-22
+          months are plausible, though exponential scale parameters are simplistic
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Follows imports→data→plot→save, but the KM calculation loops add
+          complexity (necessary for the algorithm)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Code uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with fill="toself" for CI bands, hovertemplate for
+          interactive tooltips, proper plotly_white template, write_html for interactive
+          version
+  verdict: APPROVED
diff --git a/plots/survival-kaplan-meier/metadata/plotnine.yaml b/plots/survival-kaplan-meier/metadata/plotnine.yaml
index b32917fecb..cf70ec7da5 100644
--- a/plots/survival-kaplan-meier/metadata/plotnine.yaml
+++ b/plots/survival-kaplan-meier/metadata/plotnine.yaml
@@ -26,3 +26,181 @@ review:
   - Code uses helper functions instead of flat KISS structure (imports → data → plot
     → save)
   - Legend position overlaps with data region
+  image_description: The plot displays a Kaplan-Meier survival curve comparing two
+    treatment groups. Treatment A is shown in blue (#306998) and Treatment B in yellow/gold
+    (#FFD43B). Both curves start at 100% survival probability at time 0 and decrease
+    over time using characteristic step functions. The blue Treatment A curve extends
+    to approximately 75 months with final survival around 20%, while the yellow Treatment
+    B curve ends around 35 months reaching close to 0% survival. Shaded confidence
+    interval bands surround each curve (blue and yellow respectively, with alpha transparency).
+    Vertical tick marks indicating censored observations are visible along both curves.
+    The title reads "survival-kaplan-meier · plotnine · pyplots.ai". X-axis shows
+    "Time (months)" ranging from 0-75, Y-axis shows "Survival Probability" with percentage
+    labels (0%, 25%, 50%, 75%, 100%). A legend in the upper right identifies both
+    treatment groups.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Step curves and confidence bands clearly visible, censored marks
+          visible but slightly thin
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe (distinguishable for all common
+          types)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, legend placement in upper right works well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has units "Time (months)", Y-axis descriptive "Survival Probability"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is very subtle (good), but legend overlaps slightly with data
+          region
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Kaplan-Meier step function survival plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X-axis, survival probability on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step functions, confidence intervals, censored marks, group comparison
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range from 0-100% survival and 0 to max time
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Treatment A and Treatment B
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "survival-kaplan-meier · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows divergent survival curves, confidence intervals widening over
+          time, censored observations; Treatment B reaches 0% showing full survival
+          curve behavior
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial comparing two treatments is a classic, neutral, realistic
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Time in months (0-75) is realistic; survival curves show plausible
+          progression though Treatment B hazard rate creates somewhat aggressive decline
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Code uses helper functions (generate_survival_data, kaplan_meier)
+          instead of flat script structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent use of plotnine''s grammar of graphics: ggplot() + geom_step()
+          + geom_ribbon() + geom_point() layered composition, scale_color_manual/scale_fill_manual,
+          theme customization'
+  verdict: APPROVED
diff --git a/plots/survival-kaplan-meier/metadata/seaborn.yaml b/plots/survival-kaplan-meier/metadata/seaborn.yaml
index ce6bb3c612..d0ac6928b5 100644
--- a/plots/survival-kaplan-meier/metadata/seaborn.yaml
+++ b/plots/survival-kaplan-meier/metadata/seaborn.yaml
@@ -30,3 +30,173 @@ review:
     calculations
   - Does not use any seaborn-specific plotting functions (only sns.set_style) - relies
     entirely on matplotlib for actual plotting
+  image_description: 'The plot displays two Kaplan-Meier survival curves comparing
+    Treatment (dark blue) and Control (yellow/gold) groups over a 36-month follow-up
+    period. Both curves start at 1.0 survival probability and step down over time,
+    with the Treatment group showing better survival (ending around 0.4) compared
+    to Control (ending around 0.25). Each curve has shaded confidence interval bands
+    (blue for treatment, yellow for control). Vertical tick marks along the curves
+    indicate censored observations. A horizontal dashed gray line at y=0.5 marks the
+    median survival reference. An annotation box in the upper right shows median survival
+    times: Treatment 22.4 months, Control 16.9 months. The legend is positioned in
+    the lower left. Title follows the pyplots.ai format. Grid is subtle with dashed
+    lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, ticks 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Step curves with linewidth=3 clearly visible, tick marks sized appropriately
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis missing units (should be "Survival Probability (proportion)"
+          or similar)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dashed grid at alpha 0.3, legend well-placed in lower left
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Kaplan-Meier step function
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on X, survival probability on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Step function, confidence intervals, censoring marks, group comparison,
+          median annotation
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 0-36 month range shown, Y-axis 0-1.05
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Treatment/Control labels correct
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"survival-kaplan-meier · seaborn · pyplots.ai" correct'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows declining survival, group differences, censoring, confidence
+          widening over time
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial with 150 patients, exponential survival model, appropriate
+          censoring
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 36-month follow-up, realistic survival proportions
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses function definition (kaplan_meier) instead of inline code
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Only uses sns.set_style(), actual plotting done with matplotlib ax.step(),
+          ax.fill_between(), ax.scatter()
+  verdict: APPROVED
diff --git a/plots/swarm-basic/metadata/altair.yaml b/plots/swarm-basic/metadata/altair.yaml
index 570f437b98..6f5e1a8763 100644
--- a/plots/swarm-basic/metadata/altair.yaml
+++ b/plots/swarm-basic/metadata/altair.yaml
@@ -26,3 +26,178 @@ review:
   - Blue (#306998) and teal (#4B8BBE) colors are quite similar and may cause confusion
     for some viewers
   - Outputs both PNG and HTML when specification should focus on PNG output only
+  image_description: 'The plot displays a basic swarm plot showing employee performance
+    scores (y-axis, ranging from ~20 to 110) across four departments: Engineering
+    (blue), Marketing (yellow), Sales (teal), and HR (gray). Points are horizontally
+    spread within each category to reveal distribution density while avoiding overlap.
+    Each department shows a distinct distribution pattern: Engineering has a tight
+    cluster around 75-85, Marketing shows wider spread with some low outliers, Sales
+    appears bimodal with scores clustering around both 60-70 and 80-85, and HR shows
+    moderate spread with visible outliers at both ends. Black diamond markers indicate
+    the mean for each department, with dashed horizontal lines extending from them.
+    The legend is positioned in the upper right. The title reads "swarm-basic · altair
+    · pyplots.ai" at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text clearly readable: title at 28pt, axis labels at 22pt, tick
+          labels at 18pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or labels
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Point size (180) appropriate for ~170 points, opacity 0.7 works well.
+          Slight overlap in dense areas but acceptable for swarm
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow/teal/gray palette is mostly colorblind-friendly but blue
+          vs teal may be difficult for some
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Performance Score", "Department") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle (alpha 0.3), legend well placed but slightly overlaps
+          with upper data points area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct swarm/beeswarm plot with horizontal spread
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: individual points, horizontal spread,
+          mean markers'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately (25-105 scale)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly match department names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "swarm-basic · altair · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: shows tight distribution (Engineering), wide spread (Marketing),
+          bimodal (Sales), outliers (HR)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department is a realistic, relatable
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores 30-100 are realistic and sensible
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses loops and nested logic for jitter calculation; not pure flat
+          script
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) properly set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Outputs plot.png and plot.html (should be just plot.png for review)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's layered chart composition, tooltips, and declarative
+          encoding well. However, the manual jitter calculation is a workaround rather
+          than using a native Altair approach
+  verdict: APPROVED
diff --git a/plots/swarm-basic/metadata/bokeh.yaml b/plots/swarm-basic/metadata/bokeh.yaml
index 75b4746564..94b77254b4 100644
--- a/plots/swarm-basic/metadata/bokeh.yaml
+++ b/plots/swarm-basic/metadata/bokeh.yaml
@@ -24,3 +24,180 @@ review:
   weaknesses:
   - No legend explaining what the horizontal black lines represent (mean markers)
   - Swarm algorithm is implemented manually rather than using a library feature
+  image_description: 'The plot displays a swarm (beeswarm) plot showing employee performance
+    scores across 4 departments: Engineering (blue points, ~45 observations clustered
+    around 80), Marketing (yellow points, ~38 observations with wider spread around
+    75), Sales (green points, ~52 observations showing bimodal distribution with clusters
+    around 65 and 88), and HR (purple points, ~35 observations around 78 with visible
+    outliers at ~45 and ~98). Each department has a black horizontal line indicating
+    the mean value. Points are horizontally jittered to avoid overlap, revealing the
+    full distribution shape. The background is light gray (#fafafa), with subtle horizontal
+    grid lines. Title "swarm-basic · bokeh · pyplots.ai" is centered at the top. Y-axis
+    shows "Performance Score (points)" ranging from ~30-100, X-axis shows "Department"
+    with category labels.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 26pt, tick labels at 18-20pt - all
+          clearly readable, though category labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, swarm jitter effectively separates
+          points
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Point size=18 with alpha=0.7 works well for ~170 total points; some
+          minor overlap in dense regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, purple palette is colorblind-friendly (no red-green
+          distinction)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, though data is concentrated in upper portion
+          (60-100 range)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Performance Score (points)", X-axis has descriptive
+          "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), but no legend explaining mean
+          markers
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct swarm/beeswarm plot with horizontal jitter to avoid overlap
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, continuous values on Y-axis as expected
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Includes mean markers as suggested in spec notes, proper point sizing
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 25-105 shows all data points comfortably
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Mean markers present but not labeled in a legend
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "swarm-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows different distribution shapes: tight (Engineering), wide (Marketing),
+          bimodal (Sales), outliers (HR) - excellent variety'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department is a perfect real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores 30-100 are realistic, group sizes 35-52 appropriate
+          for swarm
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → jitter calculation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (numpy, bokeh.io, bokeh.models, bokeh.plotting)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves plot.png and plot.html but strict=True in zip could cause issues
+          in edge cases
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, figure customization, but swarm algorithm
+          is manual numpy rather than a Bokeh-specific feature
+  verdict: APPROVED
diff --git a/plots/swarm-basic/metadata/highcharts.yaml b/plots/swarm-basic/metadata/highcharts.yaml
index f075b558b1..00b6cf908a 100644
--- a/plots/swarm-basic/metadata/highcharts.yaml
+++ b/plots/swarm-basic/metadata/highcharts.yaml
@@ -27,3 +27,184 @@ review:
   - Helper function for swarm positions deviates from KISS principle (though functionally
     necessary)
   - Axis labels lack units (e.g., Performance Score 0-100 would be more informative)
+  image_description: 'The plot displays a swarm plot showing employee performance
+    scores across four departments: Engineering (blue), Marketing (yellow), Sales
+    (purple), and Operations (cyan). Each department has data points spread horizontally
+    to avoid overlap, forming characteristic swarm/beeswarm shapes. The title "swarm-basic
+    · highcharts · pyplots.ai" appears at the top in bold black text, with a subtitle
+    "Employee Performance Scores by Department" below. Red diamond markers indicate
+    the mean for each category. The y-axis shows "Performance Score" ranging from
+    ~34 to 106, and the x-axis shows "Department" with category labels. The background
+    is white with subtle gray dashed grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable with appropriate font sizes
+          (72px title, 48px axis titles, 36px labels). Slightly reduced because y-axis
+          tick labels could be larger for the canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Points spread nicely to avoid overlap, which is the core feature
+          of a swarm plot. Text elements do not overlap.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (radius 14) and visible. The swarm algorithm
+          works well, though some categories have slightly more overlap than ideal
+          due to high point density in certain score ranges.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses colorblind-safe palette (#306998 blue, #FFD43B yellow, #9467BD
+          purple, #17BECF cyan). No red-green combinations.'
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space. Plot fills appropriate area with balanced
+          margins. Chart dimensions set to 4800x2700.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Performance Score" and "Department" are descriptive but lack units
+          (score could have been "Performance Score (0-100)" for full marks).'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with dashed lines at 0.1 opacity. However, the legend
+          appears to be cut off or not visible in the rendered image.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct swarm/beeswarm plot implementation with horizontal jitter.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, continuous values on y-axis, correctly assigned.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has individual points, swarm layout, mean markers. Spec suggested
+          "subtle mean or median marker" - implemented with diamond markers, but they
+          could be more subtle.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range from ~35-105, capturing all data points.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend items defined correctly for each category and mean.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "swarm-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows different distribution shapes: Engineering has bimodal (main
+          cluster + high performers), Marketing has wider spread, Sales shows clear
+          bimodal distribution, Operations has tight cluster. Could show one more
+          extreme outlier.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department is a realistic, comprehensible
+          scenario matching the spec's applications.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores in 40-100 range are realistic for employee metrics.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Has a helper function `compute_swarm_positions` which technically
+          violates KISS, but it's necessary for the swarm algorithm. Minor deduction.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts scatter series, custom markers with borders, tooltips
+          with formatting. Could have used more interactive features like zoom, data
+          grouping, or custom events.
+  verdict: APPROVED
diff --git a/plots/swarm-basic/metadata/letsplot.yaml b/plots/swarm-basic/metadata/letsplot.yaml
index b03d79eab2..c988feb5b2 100644
--- a/plots/swarm-basic/metadata/letsplot.yaml
+++ b/plots/swarm-basic/metadata/letsplot.yaml
@@ -23,3 +23,176 @@ review:
   weaknesses:
   - Axis label Performance Score could include units or scale indication like Performance
     Score (0-100)
+  image_description: 'The plot displays a swarm (sina) plot showing Performance Scores
+    across four departments: Engineering (blue), Marketing (yellow), Sales (green),
+    and Support (red/crimson). Individual data points are spread horizontally to avoid
+    overlap, creating the characteristic beeswarm pattern. Each department shows a
+    different distribution shape - Engineering has a tight cluster around 80, Marketing
+    shows wider spread with some low outliers, Sales displays a bimodal pattern with
+    clusters around 65 and 90, and Support has a moderate spread with visible outliers
+    at the low end (around 45-48). Black horizontal crossbars indicate the mean score
+    for each department. The title correctly reads "swarm-basic · letsplot · pyplots.ai".
+    The plot uses a clean minimal theme with subtle gray horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at appropriate
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap; data points spread nicely using sina algorithm
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=4) with good alpha (0.7); density is
+          appropriate for ~175 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, green, red) provide good contrast
+          and differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Performance Score" and "Department" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; legend hidden (not needed as x-axis
+          labels identify categories)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct swarm/beeswarm plot using geom_sina
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Category on X, continuous value on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Individual points shown, horizontal spreading to avoid overlap, mean
+          markers present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-100 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; categories clearly labeled on x-axis
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "swarm-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: Engineering (tight normal), Marketing (wide spread
+          with outliers), Sales (bimodal distribution), Support (outliers visible)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department is a perfect real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores 0-100 are realistic; group sizes (38-52) appropriate
+          for swarm plots
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) and geom_sina seed=42
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: geom_sina is the lets-plot equivalent of swarm/beeswarm, uses scale_color_manual/scale_fill_manual,
+          theme customization, crossbar for means
+  verdict: APPROVED
diff --git a/plots/swarm-basic/metadata/matplotlib.yaml b/plots/swarm-basic/metadata/matplotlib.yaml
index 7562c28a17..bdb81d480b 100644
--- a/plots/swarm-basic/metadata/matplotlib.yaml
+++ b/plots/swarm-basic/metadata/matplotlib.yaml
@@ -23,3 +23,177 @@ review:
   - Legend is redundant - departments already labeled on x-axis, only Mean marker
     needs explanation
   - For ~190 data points, marker size s=150 causes some visual density in centers
+  image_description: 'The plot displays a swarm plot (beeswarm plot) showing employee
+    performance scores across 4 departments: Engineering (blue), Sales (yellow), Marketing
+    (green), and Support (orange/coral). Individual data points are spread horizontally
+    to avoid overlap, clearly showing the distribution shape for each category. Each
+    department has 40-55 points with different distributions - Marketing clusters
+    highest (~80), Engineering mid-high (~78), Sales spread (~72 with outliers down
+    to ~33), and Support lower (~68). Diamond markers show the mean for each group.
+    The title follows the required format "swarm-basic · matplotlib · pyplots.ai".
+    Axis labels are "Department" and "Performance Score". A legend in the upper right
+    identifies categories and the mean marker. Grid lines are subtle (y-axis only).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Points spread properly via swarm algorithm, no text overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Marker size s=150 with alpha=0.7 is appropriate for ~190 points total;
+          slight overlap in dense regions but acceptable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, coral palette is colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive but lack units (Performance Score could be
+          "Performance Score (0-100)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle with alpha=0.3, but legend includes redundant department
+          entries (already shown on x-axis)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct swarm/beeswarm plot with horizontal point spread
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has individual points, swarm positioning, mean markers as suggested
+          in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full range 25-105, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies mean marker
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "swarm-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions (tight Marketing, spread Sales with
+          outliers), but could show more distinct patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department is a realistic, relatable
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores 0-100 are reasonable, but some clipping at boundaries
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" but in current directory, not specified path
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Custom swarm positioning algorithm is clever but matplotlib doesn't
+          have a native swarm plot function - this is a workaround using scatter().
+          Seaborn would be the natural choice here. The implementation correctly uses
+          ax methods over plt.
+  verdict: APPROVED
diff --git a/plots/swarm-basic/metadata/plotly.yaml b/plots/swarm-basic/metadata/plotly.yaml
index 501d23e70d..ed143e97b1 100644
--- a/plots/swarm-basic/metadata/plotly.yaml
+++ b/plots/swarm-basic/metadata/plotly.yaml
@@ -27,3 +27,177 @@ review:
     or advanced interactivity beyond basic hover
   - Axis labels lack units (could be "Test Score (points)" or similar)
   - Legend placement has slight isolation from the main plot area
+  image_description: 'The plot displays a swarm plot showing student test scores across
+    4 classrooms (Room A through Room D). Each classroom has its own color: Room A
+    in dark blue (#306998), Room B in yellow (#FFD43B), Room C in light blue (#5A9BD4),
+    and Room D in orange (#E07B39). Individual data points are displayed as circular
+    markers with dark outlines, spread horizontally to avoid overlap. Each classroom
+    includes a white diamond marker indicating the mean score. The title "swarm-basic
+    · plotly · pyplots.ai" is centered at the top. The x-axis shows "Classroom" with
+    category labels, and the y-axis shows "Test Score" ranging from approximately
+    40-100. A legend on the right identifies each classroom color. The plot uses a
+    clean white template with subtle gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis titles at 24pt, tick labels at 20pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, categories well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized at 14px with good opacity (0.8) and visible outlines.
+          Slight overlap in dense areas but acceptable for swarm plot
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses blue, yellow, light blue, orange palette - distinguishable for
+          most colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space, plot fills majority of area. Minor issue:
+          legend slightly isolated on right'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels ("Test Score", "Classroom") but no units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle gridlines (alpha 0.05-0.1), legend well placed and clear
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct swarm/beeswarm plot with horizontal point spreading
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, continuous values on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows individual points, horizontal spreading to avoid overlap, mean
+          markers as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range [35, 105] shows all data points appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four classrooms
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "swarm-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: Room A shows bimodal (main cluster + high performers),
+          Room B has wide spread with outliers, Room C is bimodal (two clusters),
+          Room D has tight clustering - demonstrates various distribution shapes'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Student test scores by classroom is plausible and relatable, though
+          somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores 40-95 are realistic for classroom assessments
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Implements beeswarm algorithm manually instead of using plotly's
+          built-in jitter or a violin plot overlay. Does not leverage plotly's distinctive
+          interactive features in an interesting way
+  verdict: APPROVED
diff --git a/plots/swarm-basic/metadata/plotnine.yaml b/plots/swarm-basic/metadata/plotnine.yaml
index e85c1d3b9b..09c03b150c 100644
--- a/plots/swarm-basic/metadata/plotnine.yaml
+++ b/plots/swarm-basic/metadata/plotnine.yaml
@@ -22,3 +22,178 @@ review:
   - Clean, readable code following KISS principles
   weaknesses:
   - Grid lines could be more subtle (currently visible but acceptable)
+  image_description: 'The plot displays a swarm-style visualization with four treatment
+    groups (Placebo, Low Dose, Medium Dose, High Dose) on the x-axis and Biomarker
+    Level (ng/mL) on the y-axis ranging from ~20 to ~90. Each group uses a distinct
+    color: teal/green for Placebo, orange for Low Dose, purple/violet for Medium Dose,
+    and pink/magenta for High Dose. Individual data points are jittered horizontally
+    to show distribution. Dark blue diamond-shaped median markers are prominently
+    displayed for each group. The title follows the correct format: ''swarm-basic
+    · plotnine · pyplots.ai''. The background uses a minimal theme with subtle grid
+    lines. The plot clearly shows an upward trend in biomarker levels from Placebo
+    through High Dose groups.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis titles at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all elements well separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points sized appropriately (size=3.5), alpha=0.75 works well for
+          ~190 points; slightly smaller than ideal
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-friendly palette (teal, orange, purple, pink) with
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, plot fills canvas well; minor whitespace on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis includes units 'Biomarker Level (ng/mL)', X-axis 'Treatment
+          Group'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend shown (legend_position='none'), which is fine since colors
+          distinguish categories
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct swarm/beeswarm style using jittered points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Individual points, spread to avoid overlap, median markers included
+          as spec suggested
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden as colors are self-explanatory with category
+          labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: ''swarm-basic · plotnine · pyplots.ai'''
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions (widths, centers), dose-response relationship
+          visible; could have more varied spreads
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Clinical trial biomarker data is highly plausible and meaningful
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values 20-100 ng/mL reasonable for biomarkers; slightly compressed
+          range
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) and position_jitter(random_state=42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: stat_summary with fun_y parameter works but may show deprecation
+          warnings
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar (aes, geom_point, stat_summary, theme), position_jitter,
+          scale_color_manual; could leverage more plotnine-specific features
+  verdict: APPROVED
diff --git a/plots/swarm-basic/metadata/pygal.yaml b/plots/swarm-basic/metadata/pygal.yaml
index 2f37f0aafe..fb0c8c58e1 100644
--- a/plots/swarm-basic/metadata/pygal.yaml
+++ b/plots/swarm-basic/metadata/pygal.yaml
@@ -24,3 +24,179 @@ review:
   - Legend shows duplicate entries for categories and Mean - could be cleaner
   - Y-axis range extends to 120 but no data points above ~115, slight wasted space
     at top
+  image_description: 'The plot displays a swarm/beeswarm plot showing employee performance
+    scores (y-axis, ranging from 30-120) across four departments (x-axis): Engineering,
+    Marketing, Sales, and Operations. Each department has its data points displayed
+    in a distinct color: Engineering in blue (#306998), Marketing in yellow (#FFD43B),
+    Sales in green (#4CAF50), and Operations in orange (#FF5722). The points are spread
+    horizontally within each category column to avoid overlap, creating the characteristic
+    "beeswarm" pattern. Each category also has a subtle horizontal line indicating
+    the mean value. The title "swarm-basic · pygal · pyplots.ai" appears at the top.
+    A legend at the bottom shows all four department names plus "Mean". The background
+    is white with subtle horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          size with appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; beeswarm algorithm successfully spreads points
+          to minimize data overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Dot size (12) is appropriate for the data density (~190 points total),
+          points are clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, green, orange) provide good contrast
+          and are colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas, though y-axis extends to 120 with no data above
+          ~115
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Department" and "Performance Score" are descriptive labels'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend has redundant entries (categories listed twice via data series
+          AND legend labels)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements a swarm/beeswarm plot with horizontal spreading
+          algorithm
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, continuous values on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Individual points visible, horizontal spreading to avoid overlap,
+          mean markers included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within the axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "swarm-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows different distributions (Engineering tight, Sales wide spread),
+          though distributions could show more variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores by department is a realistic, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores in 40-115 range are realistic for 0-100 type metrics
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → style → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducible data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and pygal imports, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Creative use of XY chart with custom beeswarm algorithm; pygal does
+          not have native swarm plot support, so this is a good workaround using stroke=False
+          for scatter-like points
+  verdict: APPROVED
diff --git a/plots/swarm-basic/metadata/seaborn.yaml b/plots/swarm-basic/metadata/seaborn.yaml
index 9d92d43c67..35cfbe9baa 100644
--- a/plots/swarm-basic/metadata/seaborn.yaml
+++ b/plots/swarm-basic/metadata/seaborn.yaml
@@ -24,3 +24,177 @@ review:
   - Axis labels could include units (e.g., "Performance Score (points)" or "Performance
     Score (0-100)")
   - Could leverage more seaborn features like sns.set_theme() for consistent styling
+  image_description: 'The plot displays a swarm plot showing employee performance
+    scores across four departments: Engineering (blue), Marketing (yellow), Sales
+    (teal), and Support (coral/salmon). Each department has its data points spread
+    horizontally in a beeswarm pattern to avoid overlap, clearly showing the distribution
+    shape. The title "swarm-basic · seaborn · pyplots.ai" is at the top. The y-axis
+    shows "Performance Score" ranging from about 30 to 100, and the x-axis shows "Department"
+    with the four category labels. Each department has a dark diamond marker indicating
+    the median value. A legend in the upper right identifies the diamond as "Median".
+    The grid is subtle with dashed horizontal lines. The Engineering group shows a
+    tight cluster around 75-90, Marketing has a wider spread with some points below
+    50, Sales shows a bimodal pattern with clusters around 60-70 and 80-90, and Support
+    shows outliers both high (~95) and low (~45-48).'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, swarm algorithm effectively spreads points
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Point size of 8 with alpha 0.8 is well-suited for ~175 total points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, teal, coral) are colorblind-distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Performance Score", "Department") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), legend is well-placed but could be positioned
+          better to not overlap potential data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct swarm/beeswarm plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, continuous values on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has median markers as suggested in spec, color distinguishes categories
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (25-105) shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies median marker
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows diverse distributions: tight (Engineering), wide (Marketing),
+          bimodal (Sales), with outliers (Support)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores across departments is a real, relatable
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Performance scores 30-100 are realistic, sample sizes (38-52 per
+          dept) are appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Using hue with legend=False is correct for seaborn 0.14+, but `strict=True`
+          in zip is unnecessarily explicit
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses sns.swarmplot which is seaborn's distinctive feature, but doesn't
+          leverage additional seaborn strengths like set_theme() or statistical annotations
+  verdict: APPROVED
diff --git a/plots/ternary-basic/metadata/altair.yaml b/plots/ternary-basic/metadata/altair.yaml
index 9b6e461a3c..7b0dd57b29 100644
--- a/plots/ternary-basic/metadata/altair.yaml
+++ b/plots/ternary-basic/metadata/altair.yaml
@@ -24,3 +24,175 @@ review:
   weaknesses:
   - Tick labels on left and right edges could be slightly larger for better readability
     at smaller display sizes
+  image_description: The plot displays an equilateral triangle representing a ternary
+    diagram for soil composition. The triangle has three vertices labeled "Sand (100%)",
+    "Silt (100%)", and "Clay (100%)" in bold dark text. Inside the triangle, 50 blue
+    circular data points (#306998 color) are scattered showing different soil sample
+    compositions. Dashed gray grid lines run parallel to each edge at 20%, 40%, 60%,
+    and 80% intervals, creating a readable reference grid. Tick marks with numerical
+    labels (20, 40, 60, 80) appear along each edge. The title "Soil Composition ·
+    ternary-basic · altair · pyplots.ai" appears at the top in a clean sans-serif
+    font. The overall layout is clean with good use of whitespace and the triangle
+    is well-centered on the canvas.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt is excellent, vertex labels at 22pt are bold and clear,
+          tick labels at 14pt are readable but could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, all labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points at size=300 with alpha 0.8 are well-suited for 50 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color (#306998 blue) is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, triangle is well-centered, minor whitespace
+          imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Vertex labels include component names with percentage notation
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle with dashed lines and alpha 0.3; no legend needed
+          for single-color points
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ternary plot with equilateral triangle
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Components correctly mapped to vertices using proper ternary-to-Cartesian
+          transformation
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines at 20% intervals, vertex labels, tick marks, distinct
+          markers all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within triangle bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (single series, no legend needed)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Soil Composition · ternary-basic · altair · pyplots.ai" follows
+          correct format'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Good spread of points across the triangle showing various compositions,
+          though somewhat clustered in center
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Soil composition (sand/silt/clay) is a classic ternary plot application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, values are realistic for soil samples
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot layers → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API used
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png (correct) but VQ-07 scored as 0
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's declarative layer composition, mark_rule for grid lines,
+          tooltips for interactivity, and proper encoding types
+  verdict: APPROVED
diff --git a/plots/ternary-basic/metadata/bokeh.yaml b/plots/ternary-basic/metadata/bokeh.yaml
index 99ba63bb97..a41d63700f 100644
--- a/plots/ternary-basic/metadata/bokeh.yaml
+++ b/plots/ternary-basic/metadata/bokeh.yaml
@@ -15,3 +15,14 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: 'The plot displays an equilateral triangle representing a ternary
+    diagram for soil composition with three components: Sand (bottom-left vertex),
+    Silt (bottom-right vertex), and Clay (top vertex). The triangle has a black outline
+    with gray grid lines at 20% intervals creating a mesh pattern inside. Approximately
+    50 blue circular data points (Python Blue #306998) with darker blue outlines are
+    scattered across the triangle, representing different soil samples. Tick labels
+    (0, 20, 40, 60, 80, 100) are placed along each edge. The title reads "Soil Composition
+    · ternary-basic · bokeh · pyplots.ai" at the top center. The vertex labels are
+    bold and clearly positioned outside the triangle. The overall layout is clean
+    with good whitespace margins.'
+  verdict: APPROVED
diff --git a/plots/ternary-basic/metadata/highcharts.yaml b/plots/ternary-basic/metadata/highcharts.yaml
index aad4b27964..c435b652e6 100644
--- a/plots/ternary-basic/metadata/highcharts.yaml
+++ b/plots/ternary-basic/metadata/highcharts.yaml
@@ -24,3 +24,167 @@ review:
   - Helper function ternary_to_cartesian violates KISS principle (should be inline
     calculation)
   - Legend is disabled when it could display the Soil Samples series name
+  image_description: 'The plot displays a ternary diagram for soil composition with
+    three components: Sand (top vertex), Silt (bottom-left), and Clay (bottom-right).
+    An equilateral triangle contains ~50 blue circular markers with transparency (rgba
+    blue #306998), representing soil samples. Grid lines at 20% intervals with percentage
+    tick labels (20%, 40%, 60%, 80%) along each edge. The title "Soil Composition
+    · ternary-basic · highcharts · pyplots.ai" is prominently displayed. Clean white
+    background with well-balanced margins.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title clear at 72px, vertex labels at 48px readable, tick labels
+          adequate
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: All text elements clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized for 50 points with appropriate alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Triangle well-centered with good canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Vertices labeled with 100%, tick marks show percentages
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid subtle but legend disabled when it could show series name
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ternary plot with equilateral triangle
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Three components correctly mapped to vertices
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has grid, vertex labels, tick marks; could have axis direction indicators
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within triangle
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: false
+        comment: Legend disabled, series name not visible
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format with spec-id · library · pyplots.ai
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Points distributed across triangle showing varied compositions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Soil composition is classic ternary application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values sum to 100%, realistic proportions
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Contains helper function `ternary_to_cartesian`, violates no-functions
+          rule
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses container.screenshot() approach
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Highcharts renderer API for custom triangle/grid
+          drawing, interactive tooltips with custom data
+  verdict: APPROVED
diff --git a/plots/ternary-basic/metadata/letsplot.yaml b/plots/ternary-basic/metadata/letsplot.yaml
index 4e38795ecf..ac61c19cf7 100644
--- a/plots/ternary-basic/metadata/letsplot.yaml
+++ b/plots/ternary-basic/metadata/letsplot.yaml
@@ -22,3 +22,176 @@ review:
   - Title format includes extra text before the required format
   - Legend position could be closer to the plot to reduce empty space
   - Tick label font size could be slightly larger for better readability at full resolution
+  image_description: 'The plot displays a ternary diagram in the form of an equilateral
+    triangle representing soil composition data. The three vertices are labeled "Sand
+    (%)" (bottom-left), "Silt (%)" (bottom-right), and "Clay (%)" (top), all in bold
+    black text. Grid lines are drawn at 20% intervals (20, 40, 60, 80) with subtle
+    gray lines creating a readable reference system. Tick labels appear along each
+    edge. Data points are shown as colored circles: blue (Sandy soils clustered in
+    lower-left), yellow (Silty soils clustered in lower-right), and red (Clayey soils
+    clustered toward the top). A legend titled "Soil Type" on the right side identifies
+    the three categories. The title "Soil Composition · ternary-basic · letsplot ·
+    pyplots.ai" appears at the top-left.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and vertex labels are bold and readable; tick labels are slightly
+          small but legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized with good alpha; some clustering makes individual
+          points harder to distinguish
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and red are distinguishable and colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas; triangle is well-proportioned but some empty
+          space on the right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Vertex labels include units "(%)"; descriptive component names
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (good alpha); legend is placed well but slightly far
+          from plot
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ternary plot with equilateral triangle
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Three components correctly mapped to triangle coordinates
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines at 20% intervals, vertex labels, tick marks, distinct
+          markers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within triangle bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies three soil types
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title includes spec-id, library, pyplots.ai but adds "Soil Composition"
+          prefix (minor deviation)
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows three distinct clusters representing different soil compositions;
+          could show more overlap/transition zones
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Soil classification is a classic real-world application of ternary
+          plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, realistic ranges for soil types
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → coordinate conversion → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" but with path="." parameter (works but unconventional)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar, geom layers, theme customization; could leverage
+          more interactive features
+  verdict: APPROVED
diff --git a/plots/ternary-basic/metadata/matplotlib.yaml b/plots/ternary-basic/metadata/matplotlib.yaml
index 62e86c8b2b..24887e8be3 100644
--- a/plots/ternary-basic/metadata/matplotlib.yaml
+++ b/plots/ternary-basic/metadata/matplotlib.yaml
@@ -23,3 +23,170 @@ review:
     to show more edge/corner cases
   - Layout uses 16:9 aspect ratio but triangle is inherently more square, causing
     some wasted horizontal space
+  image_description: 'The plot displays a ternary diagram in the form of an equilateral
+    triangle. The title "Soil Composition · ternary-basic · matplotlib · pyplots.ai"
+    appears at the top. The three vertices are labeled with bold text: "Sand (%)"
+    at the top, "Silt (%)" at the bottom-left, and "Clay (%)" at the bottom-right.
+    The triangle has a solid black outline with dashed gray grid lines at 20% intervals.
+    Tick labels (0, 20, 40, 60, 80, 100) run along each edge. Approximately 50 blue
+    data points with white edges are scattered throughout the triangle interior, representing
+    soil composition samples. The blue color is a pleasant #306998 with alpha transparency.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, vertex labels at 20pt bold, tick labels at 14pt -
+          all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized appropriately (s=200) with alpha=0.7 for 50 points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color with white edge, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good triangle placement, but aspect ratio forces some whitespace
+          on sides
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All three components labeled with units (%)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriate, but no legend present (single series so acceptable)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ternary/triangle plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sand, Silt, Clay correctly mapped to vertices
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines at 20%, vertex labels, tick marks all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within triangle
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single series (full points)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows `{description} · {spec-id} · {library} · pyplots.ai` format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Good spread of points using Dirichlet distribution, but most points
+          cluster in center
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Soil composition with sand/silt/clay is classic real-world ternary
+          application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: All values sum to 100%, realistic soil proportions
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script with imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ax methods, manual coordinate transformation, but could
+          leverage matplotlib's polygon/patch features more
+  verdict: APPROVED
diff --git a/plots/ternary-basic/metadata/plotly.yaml b/plots/ternary-basic/metadata/plotly.yaml
index ef81b087be..d24ac06bc4 100644
--- a/plots/ternary-basic/metadata/plotly.yaml
+++ b/plots/ternary-basic/metadata/plotly.yaml
@@ -26,3 +26,176 @@ review:
   - Library features underutilized - could add color encoding by a fourth variable
     or use marker size variation
   - Slight marker overlap in dense central regions of the triangle
+  image_description: 'The plot displays a ternary diagram showing soil composition
+    data with three components: Sand (%), Silt (%), and Clay (%). The equilateral
+    triangle has the title "ternary-basic · plotly · pyplots.ai" centered at the top
+    in large, readable font. Blue markers (#306998) with white outlines and 0.7 opacity
+    are distributed across the triangle, representing 50 soil samples. Each vertex
+    is labeled with its component name and percentage units. Grid lines appear at
+    20% intervals with subtle gray color (0.2 opacity). The tick labels show values
+    from 0-100 at 20% increments. The white background provides clean contrast, and
+    markers are well-sized for the data density.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48pt, axis labels at 32pt, tick labels at 22pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized at 18 with appropriate opacity for 50 points (-1
+          for slightly large markers causing minor overlaps in dense areas)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, colorblind-safe blue
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good margins, triangle well-centered, slight excess whitespace on
+          sides (-1)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All axes labeled with component names and units: "Sand (%)", "Silt
+          (%)", "Clay (%)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid subtle at 0.2 opacity, but no legend shown (-1, though single-category
+          scatter doesn't strictly need legend)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ternary plot type using Scatterternary
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Three components correctly mapped to a, b, c axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines at 20% intervals, clear vertex labels, distinct markers
+          with appropriate size
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within triangle, components sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-category scatter
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "ternary-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows distribution across triangle, but data is randomly distributed
+          without showing meaningful clusters or patterns (-2)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Soil composition (sand, silt, clay) is a classic, real-world ternary
+          application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values sum to 100%, appropriate for compositional data (-1 for purely
+          random distribution)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dimensions (4800x2700 via scale=3)
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses Scatterternary and hovertemplate for interactivity, but could
+          leverage more Plotly features like colorscale or animation
+  verdict: APPROVED
diff --git a/plots/ternary-basic/metadata/plotnine.yaml b/plots/ternary-basic/metadata/plotnine.yaml
index 0f198a5567..56dc7a7636 100644
--- a/plots/ternary-basic/metadata/plotnine.yaml
+++ b/plots/ternary-basic/metadata/plotnine.yaml
@@ -24,3 +24,174 @@ review:
   weaknesses:
   - 'Title format incorrect: uses "Soil Composition · ternary-basic · plotnine · pyplots.ai"
     instead of the required "{spec-id} · {library} · pyplots.ai" format'
+  image_description: The plot displays a ternary diagram showing soil composition
+    data (sand, silt, clay percentages). The triangle has a dark blue (#306998) border
+    with a white fill. Gray grid lines are drawn at 20% intervals inside the triangle.
+    Approximately 50 blue data points are scattered throughout the triangle, showing
+    varied soil compositions with clusters near each vertex (sand-heavy, silt-heavy,
+    and clay-heavy samples). The three vertices are labeled "Sand (%)", "Silt (%)",
+    and "Clay (%)" in bold blue text. Tick labels (0, 20, 40, 60, 80, 100) appear
+    along all three edges in gray. The title reads "Soil Composition · ternary-basic
+    · plotnine · pyplots.ai" in bold black text at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and vertex labels are clear and readable; tick labels slightly
+          small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Data points are well-sized with good alpha for the 50-point dataset
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, triangle is centered but could fill slightly
+          more vertical space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All three components labeled with units (%)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; no legend needed for single-color
+          plot
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ternary/triangular plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Components correctly mapped to ternary coordinates
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines at 20% intervals, labeled vertices, tick marks on edges
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within triangle bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series plot
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: Title includes "Soil Composition" prefix instead of just "{spec-id}
+          · {library} · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Good spread using Dirichlet distribution with three clusters; shows
+          varied compositions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Soil composition (sand/silt/clay) is a classic real-world ternary
+          data example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, realistic soil proportions
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plotnine's grammar of graphics (ggplot + geom_polygon
+          + geom_segment + geom_point + geom_text + theme_void), but ternary plots
+          require manual coordinate transformation rather than native plotnine features
+  verdict: APPROVED
diff --git a/plots/ternary-basic/metadata/seaborn.yaml b/plots/ternary-basic/metadata/seaborn.yaml
index da197514b8..8779d73d8c 100644
--- a/plots/ternary-basic/metadata/seaborn.yaml
+++ b/plots/ternary-basic/metadata/seaborn.yaml
@@ -29,3 +29,178 @@ review:
   - Grid legend not present (minor issue for single-series data)
   - Square figure (12x12) instead of recommended 16:9 landscape, though appropriate
     for ternary plots
+  image_description: 'The plot displays a ternary diagram showing soil composition
+    data. The equilateral triangle has three vertices labeled in bold: "Sand (100%)"
+    at bottom-left, "Silt (100%)" at bottom-right, and "Clay (100%)" at the top. The
+    triangle is outlined in black with gray grid lines at 10% intervals creating a
+    mesh pattern inside. Tick marks with percentage labels (20%, 40%, 60%, 80%) appear
+    along all three edges. Approximately 50 blue circular data points (with white
+    edge and alpha transparency) are scattered across the triangle, representing different
+    soil compositions. The title "Soil Composition · ternary-basic · seaborn · pyplots.ai"
+    appears at the top. The background is white with a clean, professional appearance.
+    The plot uses a square aspect ratio with good utilization of the canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, vertex labels at 20pt bold, tick labels at 14pt -
+          all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers sized appropriately (s=200) with alpha=0.7 for 50 points,
+          white edges help distinguish overlapping points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast, no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas, triangle well-centered with balanced margins,
+          slight extra whitespace at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for ternary plot, but vertex labels with "(100%)" effectively
+          communicate the scale
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend present (not strictly needed
+          for single-series data)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct ternary plot with equilateral triangle
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Three components correctly mapped to triangle coordinates
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines at 10% intervals, labeled vertices, tick marks at 20%
+          intervals, visually distinct points
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within triangle
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend, but data context clear from title and labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "Soil Composition · ternary-basic · seaborn ·
+          pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 50 points spread across the triangle showing good compositional variety,
+          covers most regions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Soil composition (Sand, Silt, Clay) is a classic and realistic ternary
+          plot application
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentages sum to 100, values generated via Dirichlet distribution
+          are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (plt, np, pd, sns)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot and sns.set_style/set_context, but ternary plotting
+          is primarily matplotlib-based custom work
+  verdict: APPROVED
diff --git a/plots/timeline-basic/metadata/altair.yaml b/plots/timeline-basic/metadata/altair.yaml
index 160c8b58ae..85af491cc2 100644
--- a/plots/timeline-basic/metadata/altair.yaml
+++ b/plots/timeline-basic/metadata/altair.yaml
@@ -23,3 +23,175 @@ review:
   - Event labels at 16pt fontSize could be slightly larger (18pt) for better readability
   - Inline legend positioned close to top event labels, could use more vertical separation
   - Could add .interactive() to enable zoom/pan for exploring dense timelines
+  image_description: 'The plot displays a horizontal timeline visualization for software
+    project milestones spanning January 2024 to December 2024. A central gray horizontal
+    axis line runs through the middle of the chart. Events are represented as colored
+    circular markers connected to the axis by vertical lines, with labels alternating
+    above and below the axis to prevent overlap. The color scheme uses four distinct
+    colors for categories: blue (#306998) for Planning, yellow/gold (#E5A000) for
+    Development, teal (#4ECDC4) for Testing, and coral/salmon (#E8575A) for Release.
+    The inline legend at the top shows all four categories with their corresponding
+    colored circles. Event labels are displayed in dark gray bold text. The x-axis
+    shows dates in "Mon YYYY" format with labels angled at -45 degrees. The title
+    "timeline-basic · altair · pyplots.ai" appears at the top center.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 18pt, event labels at 16pt - all readable,
+          slightly below optimal for event labels
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Excellent alternating label positions prevent any text overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers sized at 600 with white stroke, connectors at strokeWidth=3,
+          all highly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/teal/coral palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight excess whitespace at bottom due to y-scale
+          range
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has "Date" label but no units; Y-axis appropriately disabled
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Inline legend is creative but positioned very close to top event
+          labels; grid disabled which is appropriate
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct timeline visualization with events along temporal axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates on X-axis, events as labels, categories for color-coding
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Horizontal orientation, alternating labels, color-coding by category,
+          clear date formatting
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year of project milestones visible, all data shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: All four categories correctly labeled and colored
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "timeline-basic · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows 12 events across 4 categories, demonstrates full timeline capability
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software project milestones is a neutral, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates span a realistic project timeline, event names are appropriate
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart layers → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 0
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but data is hardcoded
+          which is fine
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair 5.x API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses layered chart composition, tooltips, encoding types, but could
+          leverage more interactive features
+  verdict: APPROVED
diff --git a/plots/timeline-basic/metadata/bokeh.yaml b/plots/timeline-basic/metadata/bokeh.yaml
index db4510f156..d8548d96ca 100644
--- a/plots/timeline-basic/metadata/bokeh.yaml
+++ b/plots/timeline-basic/metadata/bokeh.yaml
@@ -24,3 +24,180 @@ review:
     full resolution
   - Legend positioned far from data area in top-right corner
   - Could benefit from Bokeh-specific features like HoverTool for interactivity
+  image_description: 'The plot displays a horizontal timeline visualization showing
+    10 software project milestones from January 2024 to October 2024. A central blue
+    horizontal line serves as the timeline axis. Events are represented by colored
+    circular markers (size ~35) positioned alternately above and below the axis with
+    vertical connector lines. The 5 project phases are color-coded using Category10
+    palette: Planning (blue), Design (orange), Development (green), Testing (red),
+    and Release (purple). Event labels appear above/below their respective markers.
+    The title "timeline-basic · bokeh · pyplots.ai" is displayed in blue (#306998)
+    at the top center. A legend titled "Phase" in the top-right shows all categories.
+    The x-axis displays dates with slight rotation, labeled "Date". Background is
+    light gray (#fafafa) with dashed vertical grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title 32pt, axis labels 24pt, tick labels 18pt, event labels 18pt
+          - all clearly readable, slightly below optimal for event labels
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Alternating above/below positioning prevents text overlap excellently
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers size 35 with alpha 0.9, connector lines visible, well-adapted
+          for 10 events
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Category10 palette is generally colorblind-friendly, but red/green
+          could be challenging for some
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, timeline centered, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Date" label present but no units (though dates are self-explanatory
+          for timeline)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is dashed with alpha 0.3 (good), legend well-styled but slightly
+          far from data
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct timeline visualization with events along temporal axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, events as labeled markers, category for color-coding
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Horizontal orientation, alternating labels, color-coding by category,
+          clear date formatting
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full date range visible with padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately shows all 5 phases with correct colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "timeline-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple phases, varying time gaps, 10 events - demonstrates
+          timeline well but could show more varied spacing
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software project milestones is an excellent, neutral, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Dates span 9 months which is realistic for a software project, events
+          well-distributed
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → figure → plot → style → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded dates), but no explicit seed for
+          any random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (pandas, bokeh.io, bokeh.models, bokeh.palettes,
+          bokeh.plotting)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Label model, figure with datetime axis, legend
+          configuration - good Bokeh usage but could leverage more interactive features
+          or tooltips
+  verdict: APPROVED
diff --git a/plots/timeline-basic/metadata/highcharts.yaml b/plots/timeline-basic/metadata/highcharts.yaml
index a59ddb2ec8..63770fc92f 100644
--- a/plots/timeline-basic/metadata/highcharts.yaml
+++ b/plots/timeline-basic/metadata/highcharts.yaml
@@ -26,3 +26,184 @@ review:
   - Missing explicit axis labels (y-axis is conceptual for timeline but VQ-06 criteria
     expects labels)
   - Slight excess bottom margin creates minor whitespace imbalance
+  image_description: 'The plot displays a horizontal timeline visualization showing
+    10 software development project milestones for 2024. Events are represented as
+    colored circular markers positioned alternately above and below a central horizontal
+    axis line. The title "timeline-basic · highcharts · pyplots.ai" appears at the
+    top in bold, with a subtitle "Software Development Project Milestones 2024" below.
+    A horizontal legend at the top shows 5 color-coded categories: Planning (blue
+    #306998), Design (yellow #FFD43B), Development (purple #9467BD), Testing (cyan
+    #17BECF), and Release (brown #8C564B). Event labels are displayed near each marker
+    with bold text. The x-axis spans from Dec 2023 to Feb 2025 with monthly tick marks.
+    The background is white and the layout is clean with good use of space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, labels, and axis text all clearly readable at full
+          resolution. Font sizes are appropriately scaled for 4800x2700.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; alternating label positions above/below axis
+          prevents collision.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are large (radius 28) with white borders, clearly visible
+          against white background.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette used; no red-green conflicts. Uses recommended
+          palette from library rules.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with balanced margins. Slight excess whitespace
+          at bottom.
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels present (acceptable for timeline where x-axis is self-explanatory
+          as dates, but y-axis concept is implicit).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at top, no distracting grid (appropriate for timeline).
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct timeline visualization using scatter plot as base with horizontal
+          temporal axis.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Events correctly mapped to x-axis (dates) with alternating y positions
+          for readability.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: temporal axis, event markers, labels,
+          category color-coding, alternating positions.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axis shows all data with appropriate padding (Dec 2023 to Feb 2025
+          for 2024 events).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 categories with matching colors.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "timeline-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple categories, chronological progression, varied event
+          spacing. Could include more variety in event density.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software development project milestones is a realistic, neutral,
+          professional scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Dates are realistic for a software project timeline. Slightly compressed
+          11-month timeline.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot config → render →
+          save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses fixed datetime values, no random data - fully deterministic.
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: All imports used, but Path is imported just for cleanup (minor).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts and Selenium APIs.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly, but also saves plot.html (acceptable
+          for interactive library).
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Uses Highcharts-specific features: datetime x-axis, plotLines for
+          central axis, per-point dataLabels configuration, interactive tooltips,
+          responsive HTML version.'
+  verdict: APPROVED
diff --git a/plots/timeline-basic/metadata/letsplot.yaml b/plots/timeline-basic/metadata/letsplot.yaml
index 4658405287..9136a69b9a 100644
--- a/plots/timeline-basic/metadata/letsplot.yaml
+++ b/plots/timeline-basic/metadata/letsplot.yaml
@@ -27,3 +27,177 @@ review:
     at full resolution
   - X-axis label 2024 is unconventional - consider moving year to title or using a
     more descriptive label
+  image_description: 'The plot displays a horizontal timeline visualization for a
+    software project spanning January to December 2024. A dark gray horizontal line
+    serves as the main axis at y=0. Ten events are shown as colored circular markers
+    connected to the axis by vertical segments. Events alternate above and below the
+    axis to prevent label overlap. The title "timeline-basic · letsplot · pyplots.ai"
+    appears at the top left in bold. Event labels (e.g., "Project Kickoff", "Requirements
+    Done", "Design Review", etc.) are positioned above or below their respective markers.
+    A legend on the right shows five color-coded phases: Planning (blue), Design (yellow),
+    Development (green), Testing (red), and Release (purple). The x-axis shows month
+    abbreviations (Jan-Dec) with "2024" as the axis label. The overall layout is clean
+    with a light/white background and subtle minimal theming.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is bold at 24pt, axis text at 16pt, labels readable but could
+          be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Alternating label positions work well, no text overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Points are well-sized (size=6), connectors visible with good alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color palette, but red/green for Testing/Development could be
+          problematic for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis labeled "2024" which is descriptive but not a typical axis
+          label format
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well placed, but no grid lines (appropriate for timeline)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct timeline visualization with events along temporal axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates on x-axis, events as labeled points
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has date, event labels, categories with color-coding, alternating
+          positions
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year shown with padding, all events visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 phases
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "timeline-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 10 events across 5 categories, good variety, but events are
+          evenly spaced which is somewhat artificial
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software project milestones is a realistic and neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Dates are realistic for a year-long project, though some phases could
+          have more events
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (fixed dates, no randomness)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" and "plot.html" but path="." may cause issues
+          in workflow context
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar properly with geom_segment, geom_point, geom_text,
+          scale_color_manual, theme_minimal. Good use of layering but does not leverage
+          lets-plot interactive features in the static output.
+  verdict: APPROVED
diff --git a/plots/timeline-basic/metadata/matplotlib.yaml b/plots/timeline-basic/metadata/matplotlib.yaml
index f24fdbcc71..9d9edc0ce9 100644
--- a/plots/timeline-basic/metadata/matplotlib.yaml
+++ b/plots/timeline-basic/metadata/matplotlib.yaml
@@ -23,3 +23,168 @@ review:
     clarity
   - Event spacing is relatively uniform - could demonstrate more varied intervals
     between milestones
+  image_description: 'The plot displays a horizontal timeline showing 8 software project
+    milestones spanning from January 2024 to December 2024. Events are represented
+    as colored circular markers connected to a central gray horizontal axis line by
+    vertical stems. Labels alternate above and below the axis to prevent overlap,
+    with each label in a white rounded box bordered by the category color. The title
+    "timeline-basic · matplotlib · pyplots.ai" appears at the top in bold black text.
+    A legend in the upper right shows four project phases: Planning (blue), Development
+    (yellow), Testing (teal), and Release (coral). Date labels (e.g., "Jan 15", "Mar
+    01") appear near each marker, and the x-axis shows month/year labels from Jan
+    2024 to Jan 2025.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, event labels 14pt bold with white background
+          boxes, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Alternating above/below positions prevent any text overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are s=300 with white edge, stems 2.5 linewidth, excellent
+          visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, teal, coral palette is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, slight margin asymmetry but overall balanced
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Timeline has no traditional axis labels (y-axis hidden, which is
+          appropriate for timelines)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate for timeline), legend well-placed in upper right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct timeline visualization with events along temporal axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates correctly mapped to horizontal axis, events as labeled markers
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Horizontal orientation, alternating labels, color-coding by category
+          all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Timeline shows full range with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all four project phases with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "timeline-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 8 events across 4 categories, good variety but could show more
+          variation in event spacing
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software project milestones is a realistic, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Year-long project timeline is realistic, though some phases could
+          have more varied durations
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.pyplot, matplotlib.dates, numpy,
+          pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300 and bbox_inches='tight'
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/timeline-basic/metadata/plotly.yaml b/plots/timeline-basic/metadata/plotly.yaml
index f70b5247e6..701f21d26d 100644
--- a/plots/timeline-basic/metadata/plotly.yaml
+++ b/plots/timeline-basic/metadata/plotly.yaml
@@ -24,3 +24,181 @@ review:
   - Vertical grid lines could be more subtle or removed for cleaner appearance
   - Could leverage Plotly interactivity more (e.g., range slider for zooming dense
     timelines)
+  image_description: 'The plot displays a horizontal timeline visualization of a software
+    project spanning January to December 2024. Twelve milestones are represented as
+    colored circular markers along a dark horizontal axis line. Events are alternately
+    positioned above and below the axis to prevent text overlap. Four categories are
+    distinguished by color: Planning (blue - #306998), Development (yellow - #FFD43B),
+    Testing (teal - #4ECDC4), and Release (red-orange - #E74C3C). Each milestone has
+    its event name label and a smaller date label (e.g., "Jan 15"). The title "timeline-basic
+    · plotly · pyplots.ai" appears at the top center, with a horizontal legend showing
+    all four categories below it. The x-axis shows months from January to December
+    with "Project Timeline (2024)" as the axis title. Dotted vertical lines connect
+    each marker to its labels. The overall design is clean with a white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt, labels at 16pt, tick fonts at 18pt. All text is readable,
+          though date labels are slightly small.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Excellent use of alternating positions above/below axis prevents
+          all text overlap.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers are appropriately sized (size=20) with white borders for
+          visibility.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good distinct colors, but yellow on white background has lower contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis has descriptive title "Project Timeline (2024)" but lacks
+          units context.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed, but vertical grid lines are visible and could
+          be more subtle.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct timeline visualization with events along temporal axis.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates correctly mapped to x-axis, events displayed as markers with
+          labels.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal orientation, alternating label
+          positions, category color-coding.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Timeline shows full year span with padding on both ends.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all four categories with accurate colors.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "timeline-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 12 events across 4 categories throughout the year. Could include
+          varied event durations or importance levels.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software project milestones is a realistic, neutral scenario appropriate
+          for business contexts.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Dates are realistic for a software project, though 12 milestones
+          in one year is dense.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → figure → traces → annotations
+          → layout → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded dates), no random elements, but no
+          explicit seed comment.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pandas and plotly.graph_objects used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html correctly.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses graph_objects with annotations and hover templates. Could leverage
+          more Plotly interactivity features like animations or range sliders.
+  verdict: APPROVED
diff --git a/plots/timeline-basic/metadata/plotnine.yaml b/plots/timeline-basic/metadata/plotnine.yaml
index de704b17bb..a99d0623b3 100644
--- a/plots/timeline-basic/metadata/plotnine.yaml
+++ b/plots/timeline-basic/metadata/plotnine.yaml
@@ -22,3 +22,184 @@ review:
   - Event spacing is very uniform (roughly monthly) - more varied spacing would better
     demonstrate timeline capabilities
   - Axis label Date could be more descriptive (e.g., Project Timeline 2024)
+  image_description: 'The plot displays a horizontal timeline showing 12 software
+    development project milestones spanning from January 2024 to December 2024. Events
+    are represented as colored circular markers positioned alternately above and below
+    a central horizontal axis line, with vertical connector lines linking each marker
+    to the axis. Labels for each event (e.g., "Project Kickoff", "Requirements Complete",
+    "Production Launch") are positioned above or below their respective markers in
+    alternating fashion to prevent overlap. The timeline uses four distinct colors
+    to represent different project phases: blue (#306998) for Planning, yellow (#FFD43B)
+    for Development, orange (#E69F00) for Testing, and green (#009E73) for Release.
+    The x-axis shows quarterly date markers from 2024-01-01 to 2025-01-01. A legend
+    at the bottom identifies the four phases. The title "timeline-basic · plotnine
+    · pyplots.ai" appears at the top center.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear (~24pt), axis labels are readable (~20pt),
+          event labels and tick marks are appropriately sized (~16pt)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Alternating label positions above/below axis effectively prevents
+          all text overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (size=8), connector lines visible; minor deduction
+          as some markers could be slightly larger for emphasis
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, orange, green) with good
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though vertical space above/below timeline
+          could be utilized slightly better
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X-axis labeled "Date" which is descriptive but lacks format specification
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom; vertical grid lines are subtle (alpha=0.3),
+          but the vertical connector lines from points could be mistaken for grid
+          elements
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct timeline visualization with events along temporal axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates correctly mapped to x-axis, events displayed as markers with
+          labels
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal orientation, alternating labels,
+          color-coding by category, clear date formatting'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 events visible, axis extends appropriately with padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all four phases
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "timeline-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good variety of phases across the year; could have shown more
+          varied event spacing to demonstrate timeline flexibility
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software development project milestones is a perfect, neutral, realistic
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 12 events over 12 months is reasonable; dates are realistic but very
+          evenly spaced (roughly monthly)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → processing → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded dates), but no explicit seed comment
+          for clarity
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with geom_segment, geom_point, geom_text,
+          and theming; could leverage more plotnine-specific features like faceting
+          or statistical transformations
+  verdict: APPROVED
diff --git a/plots/timeline-basic/metadata/pygal.yaml b/plots/timeline-basic/metadata/pygal.yaml
index 15efa054c2..e88d7575bd 100644
--- a/plots/timeline-basic/metadata/pygal.yaml
+++ b/plots/timeline-basic/metadata/pygal.yaml
@@ -23,3 +23,174 @@ review:
   - Vertical canvas utilization could be improved - timeline occupies narrow horizontal
     band with large empty areas above and below
   - Legend placement at bottom left is distant from the actual data points
+  image_description: 'The plot displays a horizontal timeline visualization of software
+    project milestones from January to November 2024. Eight events are shown as colored
+    dots along a horizontal axis with month labels (Jan through Nov). Events are alternated
+    above and below the central timeline: above are "Project Kickoff" (Jan 15), "Architecture
+    Design" (Mar 20), "Alpha Release" (Jun 15), and "User Acceptance" (Sep 10); below
+    are "Requirements Complete" (Feb 10), "Development Start" (Apr 25), "Beta Release"
+    (Aug 01), and "Production Launch" (Oct 20). Each event is labeled with its name
+    and date. Colors represent categories: blue for Planning, yellow for Design, teal
+    for Development, red/coral for Testing, and light blue for Deployment. A legend
+    at the bottom left shows all five categories. The title reads "Software Project
+    Milestones · timeline-basic · pygal · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and event labels are readable; month labels and legend text
+          are clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Alternating above/below layout prevents overlap; all text fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Dots are visible and well-sized; good use of the timeline axis
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors that are distinguishable; no red-green confusion
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Timeline is horizontally centered; large whitespace above and below
+          timeline
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Month (2024)" x-axis label is descriptive with context'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is present but placed far from the data at bottom left
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct timeline/event visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates mapped to X axis, events displayed with labels
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Events, dates, categories, alternating positions all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full year timeline visible, all events shown
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 categories with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Software Project Milestones · timeline-basic ·
+          pygal · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 8 events across 5 categories; demonstrates alternating layout
+          well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software project lifecycle is an excellent, neutral, real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Dates and project phases are realistic; timeline spans appropriate
+          duration
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → chart config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data with fixed dates
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses XY chart, custom Style, SVG manipulation for timeline axis;
+          good use of pygal's data labeling
+  verdict: APPROVED
diff --git a/plots/timeline-basic/metadata/seaborn.yaml b/plots/timeline-basic/metadata/seaborn.yaml
index a47350c6f0..ce7aa75951 100644
--- a/plots/timeline-basic/metadata/seaborn.yaml
+++ b/plots/timeline-basic/metadata/seaborn.yaml
@@ -25,3 +25,178 @@ review:
   - Could leverage more seaborn-specific features; most visualization logic uses matplotlib
     directly
   - Red and green colors used together may cause issues for red-green colorblind users
+  image_description: 'The plot displays a horizontal timeline visualization for a
+    software development project spanning January 2024 to March 2025. Nine milestones
+    are shown as large circular markers on a gray horizontal axis. Labels alternate
+    above and below the axis (e.g., "Project Kickoff" and "Architecture Design" above;
+    "Requirements Done" and "UI Mockups" below) connected by vertical colored lines.
+    Five project phases are color-coded: blue (Planning), yellow (Design), green (Development),
+    red (Testing), and purple (Deployment). The legend appears at the bottom center
+    with all five phase categories in a horizontal row. Date labels on the x-axis
+    are rotated 45 degrees showing bi-monthly intervals. The title "timeline-basic
+    · seaborn · pyplots.ai" is displayed at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, event labels at 15pt bold, tick labels at 16pt - all
+          clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Alternating label positions above/below axis successfully prevents
+          all overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Large markers (s=500) with white edge perfectly visible, connector
+          lines clearly distinguish events
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good distinct colors, though red/green combination could be problematic
+          for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, timeline centered with balanced margins,
+          slight extra padding on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No axis labels (acceptable for timeline where x-axis dates are self-explanatory,
+          but technically no labels)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate for timeline), legend well-placed at bottom
+          with good styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct timeline visualization with events along temporal axis
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Dates on x-axis, events as labeled markers, categories as colors
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has date, event labels, category color-coding, alternating positions
+          as spec recommends
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full date range visible with appropriate padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 phases with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "timeline-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows multiple categories, chronological progression, varied spacing
+          between events; could show more clustering scenarios
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Software project milestones is an excellent, neutral, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Reasonable timeline span (~1 year), though some milestone spacing
+          is quite even
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic (hardcoded dates), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: matplotlib, pandas, seaborn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with proper hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.scatterplot with hue/palette correctly, but timeline is
+          primarily matplotlib-based with seaborn for the scatter points only
+  verdict: APPROVED
diff --git a/plots/timeseries-decomposition/metadata/altair.yaml b/plots/timeseries-decomposition/metadata/altair.yaml
index e8203e972f..736e1f8ab2 100644
--- a/plots/timeseries-decomposition/metadata/altair.yaml
+++ b/plots/timeseries-decomposition/metadata/altair.yaml
@@ -25,3 +25,179 @@ review:
   - Missing light grid lines that specification explicitly requests
   - Y-axis labels all say generic Value rather than being component-specific
   - Could leverage Altair interactivity features like .interactive() or tooltips
+  image_description: 'The plot displays a time series decomposition visualization
+    with four vertically stacked line charts sharing a common x-axis (Date, spanning
+    2018-2025). Each subplot shows a distinct component: **Original** (blue line)
+    shows the raw time series data with visible trend and seasonal fluctuations ranging
+    from ~75 to ~200; **Trend** (red line) displays a smooth upward linear trend from
+    ~100 to ~170; **Seasonal** (teal line) shows a perfect repeating annual sinusoidal
+    pattern oscillating between approximately -30 and +30; **Residual** (purple line)
+    shows random noise fluctuating around 0 within a range of roughly -20 to +20.
+    The title "timeseries-decomposition · altair · pyplots.ai" appears at the top.
+    Component names are displayed as bold labels on the left side of each subplot.
+    Font sizes appear adequate for readability.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 16-20pt are readable; tick labels could
+          be slightly larger but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; x-axis labels angled at -45° prevent
+          overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line thickness of 2.5 is optimal for the data density; all components
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, red, teal, purple) are colorblind-safe
+          and have good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas with faceted layout; slight excessive whitespace
+          between subplots
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Date" and "Value" are descriptive but lack units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No visible grid lines (spec requested light grid); legend appropriately
+          hidden since component names are facet labels
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series decomposition with 4 vertically stacked subplots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, values on y-axis, correctly mapped for all components
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: All four components displayed; missing light grid lines as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with independent y-scales per subplot
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Component names clearly labeled via facet headers
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but middle dot separator could be more prominent
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows trend, seasonality, and noise effectively; 96 monthly points
+          (8 years) covers multiple cycles; decomposition shows characteristic patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Airline passengers scenario is a classic, neutral, and realistic
+          time series example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values 100-200 for passengers are realistic; seasonal amplitude and
+          residual scale are sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → decomposition → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for consistent results
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: altair, numpy, pandas, statsmodels'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Altair distinctive features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of faceting with `row=` encoding, declarative color mapping,
+          and independent y-scales via `resolve_scale`. Could have added interactivity
+          with `.interactive()` or tooltips.
+  verdict: APPROVED
diff --git a/plots/timeseries-decomposition/metadata/bokeh.yaml b/plots/timeseries-decomposition/metadata/bokeh.yaml
index 561ce3885d..6a1b677d9b 100644
--- a/plots/timeseries-decomposition/metadata/bokeh.yaml
+++ b/plots/timeseries-decomposition/metadata/bokeh.yaml
@@ -26,3 +26,178 @@ review:
   - Y-axis label rotation on Passengers (thousands) makes it slightly harder to read
   - Panel heights (620px each) leave some unused vertical space in 2700px target
   - Could use ColumnDataSource for more idiomatic Bokeh code
+  image_description: |-
+    The plot displays four vertically stacked panels showing a time series decomposition of airline passenger data from 2018-2028. The main title "timeseries-decomposition · bokeh · pyplots.ai" appears at the top. All panels use the Python blue color (#306998) for the line plots with dashed grid lines at 0.3 alpha.
+
+    - **Original Series** (top): Shows the raw data with clear upward trend and seasonal fluctuations, ranging from ~80 to ~280 thousand passengers. Y-axis labeled "Passengers (thousands)".
+    - **Trend Component**: Displays a smooth monotonically increasing line from ~110 to ~240, capturing the long-term growth.
+    - **Seasonal Component**: Shows a repeating sinusoidal pattern oscillating between approximately -30 and +30, with consistent annual cycles.
+    - **Residual Component** (bottom): Shows random noise fluctuating around zero (approximately -20 to +20), with the x-axis labeled "Date".
+
+    All panels share a linked x-axis from 2018 to 2028. The Bokeh toolbar is visible on the right side of each panel with interactive tools.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, subplot titles at 26pt, axis labels at 22pt, tick
+          labels at 18pt - all clearly readable, though y-axis labels on left side
+          are slightly rotated and could be positioned better
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 4 is perfect for the 4800px width, data density well
+          handled
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single Python blue color is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good panel distribution but some wasted space on right with Bokeh
+          toolbar; panels could use slightly more vertical space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Passengers (thousands)" includes units, "Date", "Trend", "Seasonal",
+          "Residual" are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid at 0.3 alpha with dashed style is subtle and appropriate; no
+          legend needed for this plot type but the Bokeh toolbar is visible and somewhat
+          distracting
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series decomposition with 4 stacked subplots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on x-axis, values on y-axis, correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All 4 components present: Original, Trend, Seasonal, Residual'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Each subplot clearly labeled with component name
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "timeseries-decomposition · bokeh · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows all decomposition components well; trend/seasonal/residual
+          separation is clear, though residuals could show more interesting patterns
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Airline passenger data is a classic, real-world time series example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for airline passengers in thousands; 120 months
+          (10 years) provides good coverage, though the trend gaps at start/end from
+          decomposition windowing are visible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple linear script with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as "plot.png" which is correct
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses figure, line, column layout, x_range linking between panels,
+          Title model, export_png - good Bokeh usage but could leverage ColumnDataSource
+          and HoverTool for enhanced interactivity
+  verdict: APPROVED
diff --git a/plots/timeseries-decomposition/metadata/highcharts.yaml b/plots/timeseries-decomposition/metadata/highcharts.yaml
index 87dbe1f4d0..80b227fb0f 100644
--- a/plots/timeseries-decomposition/metadata/highcharts.yaml
+++ b/plots/timeseries-decomposition/metadata/highcharts.yaml
@@ -28,3 +28,174 @@ review:
   - Individual subplot heights appear somewhat compressed - could benefit from slightly
     taller subplots
   - Legend text could be slightly larger for better readability at full resolution
+  image_description: The plot displays a time series decomposition with four vertically
+    stacked subplots, each sharing a common time axis from 2018-01 to 2028-01. The
+    first subplot shows the **Original Series** in blue (#306998), displaying airline
+    passenger data with both upward trend and seasonal variation, ranging from ~50
+    to ~400 thousand. The second subplot shows the **Trend Component** in yellow/gold
+    (#FFD43B), displaying a smooth upward trend from ~100 to ~300. The third subplot
+    shows the **Seasonal Component** in purple (#9467BD), with regular annual cycles
+    oscillating between approximately -40 and +40. The fourth subplot shows the **Residual
+    Component** in teal (#17BECF), with random noise fluctuating around zero between
+    approximately -40 and +40. Each subplot has clear titles, y-axis labels with units,
+    legends on the right side, and light grid lines. The main title "timeseries-decomposition
+    · highcharts · pyplots.ai" appears at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and subplot labels readable, axis labels clear but font sizes
+          could be slightly larger for tick marks
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Line widths are appropriate, lines clearly visible, though line width
+          could be slightly thicker
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette used (blue, yellow, purple, teal)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good vertical stacking, but individual subplots appear somewhat compressed
+          vertically
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes have descriptive labels with units (e.g., "Passengers (thousands)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legends positioned well but could be more prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series decomposition with 4 vertically stacked subplots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values on y-axis, correctly decomposed
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All four components displayed: Original, Trend, Seasonal, Residual'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match component names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "timeseries-decomposition · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows all decomposition components, trend/seasonal/residual patterns
+          visible
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly airline passengers - classic, neutral time series example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values realistic for passenger data in thousands, 10-year span appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Has a helper function which slightly violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts datetime axis formatting, multiple chart instances,
+          and selenium for PNG export. Could leverage more interactive features.
+  verdict: APPROVED
diff --git a/plots/timeseries-decomposition/metadata/letsplot.yaml b/plots/timeseries-decomposition/metadata/letsplot.yaml
index 855a106dfa..b97af20edd 100644
--- a/plots/timeseries-decomposition/metadata/letsplot.yaml
+++ b/plots/timeseries-decomposition/metadata/letsplot.yaml
@@ -24,3 +24,179 @@ review:
     readability
   - File handling requires moving files from lets-plot-images subdirectory (workaround
     needed)
+  image_description: The plot displays a time series decomposition with four vertically
+    stacked panels showing monthly temperature data over 5 years (2019-2024). The
+    **Original Series** (blue line) shows temperature fluctuations ranging from ~7°C
+    to ~30°C with clear seasonal patterns. The **Trend** component (red line) shows
+    a gradual warming from ~15°C to ~18°C. The **Seasonal** component (green line)
+    displays a repeating annual cycle oscillating between approximately -11°C and
+    +11°C. The **Residual** (purple line) shows random noise centered around 0, ranging
+    from about -2°C to +3°C. All panels share a common x-axis showing dates (Jul,
+    Oct, Jan, Apr pattern), with the main title "timeseries-decomposition · letsplot
+    · pyplots.ai" at the top. Each panel has "Temperature (°C)" on the y-axis and
+    clear component labels. Light grid lines aid readability.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title ~24pt, subplot titles ~20pt, axis labels
+          and ticks clearly visible, though axis tick text could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; x-axis labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Lines are clearly visible with good thickness (size=1.2), though
+          could be slightly thicker for the scaled output
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Excellent colorblind-safe palette: blue (#306998), red (#DC2626),
+          green (#059669), purple (#7C3AED) - all distinguishable'
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas; four panels fill the space well with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All y-axes labeled "Temperature (°C)" with units; x-axis labeled
+          "Date"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is present but very subtle (almost invisible); no legend needed
+          for this plot type
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series decomposition with all four components
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values on y-axis for all panels
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All four components present: Original, Trend, Seasonal, Residual'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (components labeled via subplot titles)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "timeseries-decomposition · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear trend, seasonal pattern, and residual noise; seasonal
+          cycle is well-defined; could show more residual variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Temperature data over 5 years is a classic, neutral, and comprehensible
+          scenario for time series decomposition
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Temperature values are realistic (7-30°C annual range), though the
+          3°C warming trend over 5 years is slightly aggressive
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern; uses gggrid for composition
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to plot.png but requires file moving from lets-plot-images
+          subdirectory
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses gggrid for multi-panel layout and theme_minimal(); could leverage
+          more lets-plot specific features like tooltips or interactive elements
+  verdict: APPROVED
diff --git a/plots/timeseries-decomposition/metadata/matplotlib.yaml b/plots/timeseries-decomposition/metadata/matplotlib.yaml
index fb51fbdcd5..46661c08aa 100644
--- a/plots/timeseries-decomposition/metadata/matplotlib.yaml
+++ b/plots/timeseries-decomposition/metadata/matplotlib.yaml
@@ -26,3 +26,177 @@ review:
   - Tick labels at 14pt are slightly below the 16pt guideline
   - Could leverage more matplotlib features like annotations or fill_between for visual
     enhancement
+  image_description: 'The plot displays a time series decomposition with four vertically
+    stacked subplots sharing a common time axis (2018-2024). The **Original** series
+    (blue, #306998) shows monthly retail sales data ranging from ~90 to ~200 with
+    clear seasonal patterns and an upward trend. The **Trend** component (yellow/gold,
+    #FFD43B) shows a smooth upward trajectory from ~110 to ~165. The **Seasonal**
+    component (blue, #4B8BBE) displays repeating annual cycles oscillating between
+    approximately -30 and +25, with visible December holiday bumps. The **Residual**
+    component (yellow/gold, #FFE873) shows random noise fluctuating around a zero
+    reference line (gray), ranging from approximately -15 to +15. All subplots have
+    dashed grid lines (alpha 0.3), clear y-axis labels (Original, Trend, Seasonal,
+    Residual), and the bottom subplot includes a "Date" x-axis label. The title "timeseries-decomposition
+    · matplotlib · pyplots.ai" is clearly displayed at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 24pt, y-labels at 18pt, x-label at 20pt, tick labels at
+          14pt - all readable but tick labels slightly under 16pt guideline
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths of 2.5 are appropriate for time series, clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-friendly; however using two similar
+          yellows for Trend and Residual could be slightly confusing
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Four subplots fill the 16:12 canvas well with good proportions
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis labels are descriptive component names but lack units (e.g.,
+          "Sales (USD)")
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3, dashed), but no legend present - acceptable
+          since each subplot is labeled
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series decomposition with four stacked subplots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values on y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All four components present: Original, Trend, Seasonal, Residual'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, no clipping
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed, subplots labeled via y-axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "timeseries-decomposition · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows upward trend, annual seasonality, holiday bumps, and random
+          residuals; could show multiplicative patterns for more variety
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly retail sales over 6 years is a realistic, neutral business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Sales values (100-200) are plausible but generic; specifying units
+          would improve context
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: '`np.random.seed(42)` ensures reproducible results'
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, statsmodels)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API with axes methods
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses basic matplotlib subplots; could leverage `GridSpec` for custom
+          height ratios, `fill_between` for confidence intervals, or annotations
+  verdict: APPROVED
diff --git a/plots/timeseries-decomposition/metadata/plotly.yaml b/plots/timeseries-decomposition/metadata/plotly.yaml
index 9a9a77ed73..6956041386 100644
--- a/plots/timeseries-decomposition/metadata/plotly.yaml
+++ b/plots/timeseries-decomposition/metadata/plotly.yaml
@@ -23,3 +23,167 @@ review:
     lack units context
   - Trend subplot y-axis label just says Trend without indicating the unit is still
     passengers
+  image_description: The plot displays a time series decomposition with four vertically
+    stacked subplots sharing a common x-axis spanning from 2014 to 2023. The top panel
+    shows the **Original** series in Python blue (#306998) depicting airline passenger
+    data fluctuating between ~100-270 with clear upward trend and seasonal oscillations.
+    The second panel shows the **Trend** component in sea green (#2E8B57), displaying
+    a smooth upward curve from ~120 to ~250. The third panel shows the **Seasonal
+    Component** in orange (#E07020) with a regular sinusoidal pattern oscillating
+    between approximately -30 and +30, repeating annually. The bottom panel shows
+    the **Residual** in purple (#8B4789), displaying random noise fluctuating around
+    zero between -20 and +20. The title "timeseries-decomposition · plotly · pyplots.ai"
+    is centered at the top. Light grid lines aid readability across all panels. The
+    x-axis label "Date" appears only on the bottom subplot.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 20pt, tick fonts at 16pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths of 2-3px are appropriate for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, green, orange, purple) are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Four subplots evenly distributed, good use of canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: First subplot has "Passengers (thousands)" but other subplots lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid at alpha 0.2, no legend needed (subplot titles suffice)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series decomposition with 4 subplots
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values on y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All four components present: Original, Trend, Seasonal, Residual'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Subplot titles correctly label each component
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: '"timeseries-decomposition · plotly · pyplots.ai" matches exactly'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear upward trend, regular 12-month seasonality, and random
+          residuals
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Airline passenger data is a classic time series example (neutral,
+          business context)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Passenger values 100-270 thousand are realistic for airline data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, plotly, statsmodels)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Plotly API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/timeseries-decomposition/metadata/plotnine.yaml b/plots/timeseries-decomposition/metadata/plotnine.yaml
index 95d68a23b8..39443d2f7b 100644
--- a/plots/timeseries-decomposition/metadata/plotnine.yaml
+++ b/plots/timeseries-decomposition/metadata/plotnine.yaml
@@ -27,3 +27,187 @@ review:
     values stacked
   - Panel spacing could be reduced to make better use of vertical canvas space
   - Grid alpha at 0.5 could be more subtle (0.3 would be better)
+  image_description: 'The plot displays a time series decomposition with four vertically
+    stacked faceted panels on a 16:9 landscape layout. The main title "timeseries-decomposition
+    · plotnine · pyplots.ai" appears at the top in bold black text. Each panel has
+    a gray strip header showing the component name with numbering: "1. Original",
+    "2. Trend", "3. Seasonal", and "4. Residual". All panels use a consistent blue
+    (#306998) line color. The x-axis shows dates from 2012 to 2024 with 2-year intervals,
+    and a shared "Date" label appears at the bottom. A shared "Value" label appears
+    on the left y-axis. The Original panel shows time series data ranging from ~200
+    to ~600 with clear upward trend and seasonal oscillation. The Trend panel shows
+    a smooth increasing line from ~260 to ~540. The Seasonal panel shows a repeating
+    sinusoidal pattern oscillating between approximately -25 and +25. The Residual
+    panel shows random noise fluctuating roughly between -30 and +30. Light gray grid
+    lines are visible in all panels. The background is minimal/white with subtle gray
+    panel backgrounds.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is clear at 24pt, axis labels at 20pt, strip text at 16pt bold.
+          Y-axis tick labels in the first panel are slightly compressed/overlapping
+          due to many values.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; strip headers, axis labels, and tick
+          marks are all clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line width of 1.2 is well-suited for the data density; all four time
+          series are clearly visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single blue color (#306998) with good contrast against white background;
+          no colorblind concerns.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space with four panels; however, the panels are
+          quite short vertically leaving significant white space between them.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Date" and "Value" are descriptive axis labels.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha 0.5, but no legend is needed for this single-series
+          plot. However, the grid could be slightly more subtle.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series decomposition with four stacked subplots.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Date on X-axis, values on Y-axis with free_y scales for each component.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All four components present: Original, Trend, Seasonal, Residual
+          with clear labels.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data is visible within axes ranges.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed, components labeled via facet strips).
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "timeseries-decomposition · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows clear trend, seasonal pattern, and residual noise; demonstrates
+          additive decomposition well. Could show slightly more variation in seasonal
+          amplitude.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Airline passenger data context is realistic and commonly used for
+          time series examples.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in reasonable range for passenger data (200-600); 12-year
+          span with monthly data provides good granularity.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → decomposition → plotting
+          → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic results.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used; statsmodels for decomposition is appropriate.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but uses both positional (dpi=300) and keyword
+          arguments; minor issue.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of ggplot grammar with facet_wrap, theme customization,
+          and scale_x_datetime. Could leverage more plotnine-specific features like
+          annotate or custom scales.
+  verdict: APPROVED
diff --git a/plots/timeseries-decomposition/metadata/pygal.yaml b/plots/timeseries-decomposition/metadata/pygal.yaml
index 792a1e85f1..c547b6104c 100644
--- a/plots/timeseries-decomposition/metadata/pygal.yaml
+++ b/plots/timeseries-decomposition/metadata/pygal.yaml
@@ -27,3 +27,174 @@ review:
     PIL composition)
   - Grid lines could be more subtle (current alpha appears fully opaque)
   - Left margin y-axis label area could be slightly narrower for better canvas utilization
+  image_description: |-
+    The plot displays a time series decomposition with four vertically stacked subplots. The main title "timeseries-decomposition · pygal · pyplots.ai" appears at the top in dark text. Each subplot shows a different component:
+    1. **Original Series (CO2 ppm)** - Blue line showing the raw CO2 measurements ranging from ~406-436 ppm with visible upward trend and oscillations
+    2. **Trend Component** - Yellow/gold line showing the smooth upward trend from ~410 to ~430 ppm
+    3. **Seasonal Component** - Green line showing the repeating annual cycle oscillating between approximately -3 and +3 ppm
+    4. **Residual Component** - Red/coral line showing random noise fluctuating around zero between approximately -2 and +2 ppm
+
+    Each subplot has its own y-axis label on the left side (CO₂ (ppm), Trend (ppm), Seasonal (ppm), Residual (ppm)). The bottom subplot includes x-axis date labels showing dates from 2020-01 to 2025-07 at 6-month intervals with 35-degree rotation. Light gray grid lines are present on all charts. The colors are distinct and colorblind-friendly. The layout effectively uses the canvas with balanced spacing.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; main title, subplot titles, and axis labels
+          are clearly visible. Tick labels could be slightly larger.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere; x-axis labels properly rotated and
+          spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths are appropriate; data clearly visible across all 4 panels
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, and red/coral are colorblind-safe and easily
+          distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good vertical stacking; slight inefficiency in left margin usage
+          for y-axis labels
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All axes labeled with units (ppm for y-axes, Date for x-axis)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid present but no legend (though not strictly needed since each
+          subplot is self-labeled)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: four vertically stacked line charts for decomposition components'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, values on y-axis for each component
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All 4 components shown: Original, Trend, Seasonal, Residual'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible with appropriate y-ranges per component
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Subplot titles clearly identify each component
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "timeseries-decomposition · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear upward trend, regular seasonal pattern, and residual
+          noise
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: CO2 measurements is a real, neutral, scientific scenario; values
+          are realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: CO2 values (~410-430 ppm) match real-world Mauna Loa-style data
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses functions/classes (PIL Image manipulation) beyond simple script
+          structure, though necessary for pygal
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html (correct, but complexity added)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal.Line with custom Style, SVG rendering, HTML output. Good
+          use of pygal's SVG nature, though complex image composition workaround needed.
+  verdict: APPROVED
diff --git a/plots/timeseries-decomposition/metadata/seaborn.yaml b/plots/timeseries-decomposition/metadata/seaborn.yaml
index 15b2be1157..fdef1e5a7f 100644
--- a/plots/timeseries-decomposition/metadata/seaborn.yaml
+++ b/plots/timeseries-decomposition/metadata/seaborn.yaml
@@ -26,3 +26,170 @@ review:
     clearer descriptions
   - Uses basic sns.lineplot rather than leveraging more distinctive seaborn features
     like FacetGrid or relplot
+  image_description: The plot displays a time series decomposition with four vertically
+    stacked subplots sharing a common x-axis (Date, 2014-2024). The top subplot "Original"
+    shows the raw airline passenger data (Passengers in thousands, 350-800 range)
+    as a blue line with clear seasonal patterns and upward trend. The second subplot
+    "Trend" displays a smooth orange/red line showing gradual growth from ~400 to
+    ~700. The third subplot "Seasonal" shows a repeating yellow/gold sawtooth-like
+    pattern oscillating between approximately -100 and +100, with a dashed gray zero-line.
+    The fourth subplot "Residual" shows green noise fluctuating around zero (-50 to
+    +50), also with a dashed gray zero-line. The main title "timeseries-decomposition
+    · seaborn · pyplots.ai" appears at the top in bold. All subplots have white backgrounds
+    with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Text is readable; title is 24pt, labels ~18pt, ticks 14pt. Slightly
+          under optimal for tick size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Line widths appropriate, all components clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors (blue, orange, yellow, green) are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Four subplots well balanced, fills canvas appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: false
+        comment: Y-axis labels descriptive but only "Original" has units (thousands)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid with alpha=0.3, no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct time series decomposition with 4 components
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Time on x-axis, component values on y-axes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All four components present: Original, Trend, Seasonal, Residual'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, appropriate axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; subplot titles serve this purpose
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows clear trend growth, regular seasonal pattern, and random residuals
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Monthly airline passengers is a classic, neutral time series example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Passenger values (400-800 thousands) are realistic for monthly data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → decomposition → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.lineplot and sns.set_theme/set_context, but could leverage
+          more seaborn-specific features
+  verdict: APPROVED
diff --git a/plots/tree-phylogenetic/metadata/altair.yaml b/plots/tree-phylogenetic/metadata/altair.yaml
index a09152fa45..cedec27405 100644
--- a/plots/tree-phylogenetic/metadata/altair.yaml
+++ b/plots/tree-phylogenetic/metadata/altair.yaml
@@ -23,3 +23,176 @@ review:
   - Code uses helper functions instead of preferred flat KISS structure
   - Missing clade color-coding which was mentioned in specification notes
   - Internal nodes could be slightly larger for better visibility
+  image_description: 'The plot displays a phylogenetic tree of primate species showing
+    evolutionary relationships based on mitochondrial DNA divergence. The tree uses
+    a rectangular (cladogram) layout with Python Blue (#306998) branches and small
+    blue internal nodes. Leaf nodes are marked with Python Yellow (#FFD43B) circles
+    with blue borders. Seven species are labeled: Olive Baboon, Rhesus Macaque, Gibbon,
+    Orangutan, Bonobo, Chimpanzee, and Human. The x-axis shows "Evolutionary Distance
+    (substitutions per site)" ranging from -0.02 to 0.46. A scale bar labeled "0.05
+    subs/site" appears in the lower left. The title reads "Primate Evolution · tree-phylogenetic
+    · altair · pyplots.ai" with a subtitle "Phylogenetic tree based on mitochondrial
+    DNA divergence". The y-axis has no labels or ticks (appropriately hidden for a
+    tree diagram).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, labels at 20pt bold, all text clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, species labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Branches at strokeWidth=4, nodes clearly visible, slight deduction
+          for internal nodes being small
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe, no red-green issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight empty space on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid visible but very subtle (good), no legend needed for this plot
+          type
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phylogenetic tree with rectangular/cladogram layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths proportional to evolutionary distance
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has scale bar, branch lengths, species labels; missing color-coding
+          of clades
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed, tooltips available)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: tree-phylogenetic · altair · pyplots.ai'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical branching, varying branch lengths, multiple clades;
+          could show more divergence in some branches
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real primate phylogeny based on mitochondrial DNA, scientifically
+          accurate relationships
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths in substitutions per site are realistic for mtDNA
+          divergence
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: Uses helper functions (get_leaves, calc_x_positions, calc_y_positions)
+          instead of flat structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of layered composition, mark_rule for branches, mark_circle
+          for nodes, mark_text for labels, tooltips for interactivity, proper configuration
+  verdict: APPROVED
diff --git a/plots/tree-phylogenetic/metadata/bokeh.yaml b/plots/tree-phylogenetic/metadata/bokeh.yaml
index e6374f207a..541bf94ee1 100644
--- a/plots/tree-phylogenetic/metadata/bokeh.yaml
+++ b/plots/tree-phylogenetic/metadata/bokeh.yaml
@@ -24,3 +24,180 @@ review:
     visual association
   - Title format includes descriptive prefix instead of strictly following spec-id
     format
+  image_description: The plot displays a rectangular phylogenetic tree (cladogram)
+    showing primate evolutionary relationships. The tree has a clean white background
+    with blue (#306998) branch lines connecting nodes. Five leaf nodes are shown as
+    yellow circles with blue outlines (Human, Chimpanzee, Gorilla, Orangutan, Gibbon),
+    and four internal/ancestral nodes are shown as smaller solid blue circles. Species
+    labels appear to the right of each leaf node in dark gray text. Clade annotations
+    ("Hominini", "Homininae", "Hominidae") appear in italics near their respective
+    ancestral nodes. A scale bar at the bottom left indicates "0.1 substitutions/site".
+    The x-axis is labeled "Evolutionary Distance (substitutions per site)" with tick
+    marks from 0 to 1. A legend in the top-right distinguishes "Extant Species (Leaf
+    Nodes)" from "Ancestral Nodes (Internal)". The title "Primate Evolution · tree-phylogenetic
+    · bokeh · pyplots.ai" appears at the top-left.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, species labels at 20pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Nodes and branches clearly visible, good sizing for the data density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Tree is reasonably centered but could use slightly more of the canvas
+          width
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with units "Evolutionary Distance (substitutions
+          per site)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid appropriately hidden for tree diagram, but legend is positioned
+          far in the corner
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phylogenetic tree (rectangular cladogram) visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X represents evolutionary distance, Y positions species correctly
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has branch lengths, scale bar, clade labels; missing color-coded
+          clades as suggested in spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies leaf vs internal nodes
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title is "Primate Evolution · tree-phylogenetic · bokeh · pyplots.ai"
+          - includes context but format differs slightly from "{spec-id} · {library}
+          · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical structure, branch lengths, multiple clades; could
+          show more variation in branch lengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Primate phylogeny based on mitochondrial DNA is a well-established,
+          scientifically accurate scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths in substitutions per site are realistic evolutionary
+          distances
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses strict=True in zip which is fine, but adds unnecessary complexity
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, HoverTool for interactivity, Label annotations,
+          Legend with custom styling - good use of Bokeh features
+  verdict: APPROVED
diff --git a/plots/tree-phylogenetic/metadata/highcharts.yaml b/plots/tree-phylogenetic/metadata/highcharts.yaml
index 571d37f034..9acadfd3ec 100644
--- a/plots/tree-phylogenetic/metadata/highcharts.yaml
+++ b/plots/tree-phylogenetic/metadata/highcharts.yaml
@@ -24,3 +24,176 @@ review:
   - Missing legend to explain the meaning of different blue shades (taxonomic ranks)
   - Code uses helper function (assign_y_positions) instead of pure KISS flat structure
   - Grid lines for Y-axis are hidden but could show subtle reference lines
+  image_description: 'The plot displays a phylogenetic tree diagram showing primate
+    evolutionary relationships. The layout is rectangular/cladogram style with the
+    root on the right side and species leaves on the left. The x-axis shows "Divergence
+    Time (Million Years Ago)" ranging from 80 to 0 MYA (reversed). Nine primate species
+    are labeled with yellow circular markers: Nasalis larvatus, Colobus guereza, Papio
+    anubis, Macaca mulatta, Symphalangus syndactylus, Hylobates lar, Pongo pygmaeus,
+    Pan troglodytes, and Homo sapiens. Branch lines are colored in shades of blue
+    (varying by taxonomic rank) with yellow for species nodes. A scale bar labeled
+    "Scale: 25 MYA" is positioned at the bottom center. The title reads "Primate Phylogeny
+    · tree-phylogenetic · highcharts · pyplots.ai" with a subtitle explaining the
+    data source. The background is white with subtle gray grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, subtitle, axis labels, and species names are all clearly readable;
+          tick labels slightly small
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; species labels well-spaced on left side
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Node markers visible; branch lines could be slightly thicker for
+          internal nodes
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-yellow palette is colorblind-safe; good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas; tree fills plot area well with appropriate margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with units "Divergence Time (Million
+          Years Ago)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend for rank colors (blue shades represent different taxonomic
+          ranks but this is not explained)
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phylogenetic tree diagram with rectangular layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths proportional to evolutionary distance (depth values)
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has scale bar, branch lengths, species labels; missing clade color
+          legend
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full evolutionary timeline shown (0-80 MYA)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend present for rank colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Primate Phylogeny · tree-phylogenetic · highcharts
+          · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical structure with multiple clades; could show more
+          divergence variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Real primate species with biologically accurate evolutionary relationships
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Divergence times are realistic for primate evolution (25-75 MYA)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: false
+        comment: Uses helper functions (assign_y_positions) instead of flat structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Selenium and Highcharts usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses Highcharts annotations for scale bar, custom series coloring,
+          scatter markers with dataLabels; good but not exceptional use
+  verdict: APPROVED
diff --git a/plots/tree-phylogenetic/metadata/letsplot.yaml b/plots/tree-phylogenetic/metadata/letsplot.yaml
index 46bcae4fef..fc0436a33f 100644
--- a/plots/tree-phylogenetic/metadata/letsplot.yaml
+++ b/plots/tree-phylogenetic/metadata/letsplot.yaml
@@ -24,3 +24,179 @@ review:
   weaknesses:
   - Missing legend to explain the three clade color groups (blue/yellow/green)
   - No scale bar to indicate branch length units
+  image_description: 'The plot displays a phylogenetic tree (rectangular cladogram
+    layout) showing primate evolutionary relationships. The tree has a blue color
+    scheme (#306998) for branches and nodes, with leaf nodes color-coded by clade:
+    blue for great apes (Human, Chimpanzee, Gorilla), yellow for lesser apes (Orangutan,
+    Gibbon), and green for Old World monkeys (Macaque, Baboon, Mandrill). The x-axis
+    shows "Evolutionary Distance (substitutions per site)" ranging from 0 to 0.85.
+    Species labels appear to the right of each leaf node. The title reads "Primate
+    Evolution · tree-phylogenetic · letsplot · pyplots.ai". Layout is horizontal with
+    good use of canvas space.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large, axis labels are clear, species names are
+          readable at 14pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, species labels are well-spaced vertically
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Branch lines (size 1.5) and nodes (size 4-6) are clearly visible,
+          though branch width could be slightly thicker
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green clade colors are colorblind-safe and distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good horizontal layout filling canvas, but some empty space on right
+          side beyond species labels
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with units "Evolutionary Distance (substitutions
+          per site)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend explaining clade colors; subtle grid only on x-axis which
+          is appropriate
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rectangular phylogenetic tree (cladogram) visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths correctly proportional to evolutionary distance on
+          x-axis
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has branch lengths, species labels, clade colors, but missing scale
+          bar indicator
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full evolutionary distance range (0 to 0.85)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend present to explain clade color coding
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correctly formatted: "Primate Evolution · tree-phylogenetic · letsplot
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 8 primate species with varied branch lengths, demonstrates
+          evolutionary divergence, but all leaf nodes end at similar x-positions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Primate phylogeny based on mitochondrial DNA is a real, scientifically
+          relevant scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Branch lengths in substitutions per site are realistic, though some
+          species (Mandrill, Baboon) should have longer terminal branches
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Contains helper functions (parse_newick, calc_x_positions, calc_y_positions)
+          which adds complexity, but necessary for Newick parsing
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic Newick string input, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only uses re, pandas, and lets_plot - all necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of lets-plot's ggplot2 grammar with geom_segment, geom_point,
+          geom_text, scale_color_identity, theme customization, and ggsize/ggsave
+  verdict: APPROVED
diff --git a/plots/tree-phylogenetic/metadata/matplotlib.yaml b/plots/tree-phylogenetic/metadata/matplotlib.yaml
index 0e852198b9..5d5708064d 100644
--- a/plots/tree-phylogenetic/metadata/matplotlib.yaml
+++ b/plots/tree-phylogenetic/metadata/matplotlib.yaml
@@ -23,3 +23,180 @@ review:
   weaknesses:
   - Missing legend to explain the meaning of the three clade colors (blue=apes, yellow=monkeys,
     green=prosimians)
+  image_description: 'The plot displays a phylogenetic tree showing primate evolutionary
+    relationships. The tree structure uses a rectangular cladogram layout with horizontal
+    and vertical branches. Three distinct clade colors are used: blue (#306998) for
+    apes, yellow (#FFD43B) for monkeys (Old World and New World), and green (#5A9C5A)
+    for prosimians (Lemur, Tarsier, Galago). Species names are labeled at leaf nodes
+    with circular markers. Human and Chimpanzee are at the top as closest relatives,
+    followed by Gorilla, then Asian apes (Orangutan, Gibbon), then Old World monkeys
+    (Rhesus Macaque, Baboon), New World monkeys (Capuchin, Spider Monkey), and prosimians
+    (Lemur, Tarsier, Galago) at the bottom. The x-axis shows "Evolutionary Distance
+    (substitutions per site)" ranging from 0.00 to 0.35. A scale bar labeled "0.05"
+    appears at the bottom left. The title reads "Primate Evolution · tree-phylogenetic
+    · matplotlib · pyplots.ai". The plot has a clean white background with subtle
+    vertical grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, tick labels 16pt, species names 16pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, species names well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Branch linewidth 2.5 appropriate, markers size 8 visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green are colorblind-distinguishable, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of canvas but slight asymmetry with prosimians having more
+          horizontal extent than apes
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis label includes units "(substitutions per site)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3) which is good, but there is no legend
+          explaining the clade color coding
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phylogenetic tree diagram with rectangular layout
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths proportional to evolutionary distance as specified
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has scale bar, clade coloring, species labels, branch lengths; missing
+          legend for clade colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: X-axis shows full range from 0 to 0.38 covering all data
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend present for clade colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "{topic} · {spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple clades (apes, monkeys, prosimians), varying branch
+          lengths, hierarchical relationships
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Primate evolution is a classic, scientifically valid phylogenetic
+          example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths (0.13-0.24 substitutions/site) are realistic for primate
+          mtDNA
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic pre-computed coordinates, no random data
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png (correct path but missing dpi verification in header
+          comment)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses matplotlib plotting capabilities
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ax.plot for branches and ax.text for labels; standard matplotlib
+          usage but no specialized features like patches, collections, or custom line
+          styles
+  verdict: APPROVED
diff --git a/plots/tree-phylogenetic/metadata/plotly.yaml b/plots/tree-phylogenetic/metadata/plotly.yaml
index 201f8801fe..1f77ced10f 100644
--- a/plots/tree-phylogenetic/metadata/plotly.yaml
+++ b/plots/tree-phylogenetic/metadata/plotly.yaml
@@ -23,3 +23,178 @@ review:
   weaknesses:
   - Grid lines extend into the tree area which could be cleaner
   - No legend present, though this is standard for phylogenetic trees
+  image_description: 'The plot displays a rectangular phylogenetic tree (cladogram)
+    showing primate evolutionary relationships. The tree has a dark blue (#306998)
+    branch structure with yellow (#FFD43B) circular markers at leaf nodes representing
+    five species: Human, Chimpanzee, Gorilla, Orangutan, and Gibbon. Species names
+    appear in dark gray text to the right of each leaf node. The tree topology shows
+    Human and Chimpanzee as the closest relatives, then Gorilla, then Orangutan, with
+    Gibbon as the outgroup. Three clade annotations in italic gray text with arrows
+    identify "Hominini" (Human-Chimp), "Homininae (African Apes)" (Human-Chimp-Gorilla),
+    and "Hominidae (Great Apes)" (all except Gibbon). A scale bar at the bottom left
+    shows "0.1 substitutions/site". The x-axis is labeled "Evolutionary Distance (substitutions
+    per site)" ranging from 0 to 0.5. The title reads "Primate Evolution · tree-phylogenetic
+    · plotly · pyplots.ai". Background is white with subtle gray grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, species names at 20pt, all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, species labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Markers (size 18) and branch lines (width 3) appropriately sized
+          for 5 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but tree is slightly left-heavy with species names
+          clustered on right
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with units "(substitutions per site)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle, but no legend (though not needed for this plot type)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct rectangular phylogenetic tree visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis correctly represents evolutionary distance, tree topology
+          accurate
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths proportional, scale bar present, clade annotations
+          included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately (0 to 0.55 range covers all species)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for tree diagrams, clade labels are accurate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Primate Evolution · tree-phylogenetic · plotly
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows hierarchical structure, branch lengths, clade groupings; could
+          show more diversity in branch lengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Primate mitochondrial DNA phylogeny is a classic, scientifically
+          accurate example
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths (0.1-0.4 substitutions/site) are realistic for primate
+          divergence
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: Deterministic data (no random), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Hover tooltips, annotations with arrows, HTML export for interactivity
+  verdict: APPROVED
diff --git a/plots/tree-phylogenetic/metadata/plotnine.yaml b/plots/tree-phylogenetic/metadata/plotnine.yaml
index c6a31538bf..904f7fa669 100644
--- a/plots/tree-phylogenetic/metadata/plotnine.yaml
+++ b/plots/tree-phylogenetic/metadata/plotnine.yaml
@@ -25,3 +25,171 @@ review:
     be just "tree-phylogenetic · plotnine · pyplots.ai"
   - Some clade colors (Homininae, Gorillini, Hominini) are identical yellow, making
     it hard to distinguish these clades in the legend
+  image_description: 'The plot displays a phylogenetic tree diagram showing primate
+    evolutionary relationships. The tree uses a rectangular/cladogram layout with
+    horizontal and vertical line segments. Eight species are shown as leaf nodes:
+    Lemur (top), Baboon, Macaque, Gibbon, Orangutan, Gorilla, Chimpanzee, and Human
+    (bottom). Branch colors indicate different clades: brown for Strepsirrhini (Lemur),
+    blue for Haplorrhini/Catarrhini/Hominoidea, red/crimson for Cercopithecidae (Old
+    World monkeys), green for Hylobatidae (Gibbon), orange for Ponginae (Orangutan),
+    and yellow for Homininae/Gorillini/Hominini (African apes + humans). Blue dots
+    mark leaf nodes, with species names in dark gray text to the right. A scale bar
+    at the bottom left shows "0.1 substitutions/site". A legend on the right lists
+    all 11 clades with their colors. The title "Primate Phylogeny · tree-phylogenetic
+    · plotnine · pyplots.ai" is bold and centered at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, species labels are readable at size 14, scale
+          bar text is clear. Slightly smaller than optimal for species names.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text anywhere, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Tree branches are well-sized (2.5), leaf points visible (size 5).
+          Good overall but could be slightly larger.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color differentiation between clades, though some yellows (Homininae,
+          Gorillini, Hominini) are very similar
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, tree is well-proportioned with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Scale bar provides units but no axis labels (appropriate for tree
+          diagram using theme_void)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid (appropriate for tree), legend well-placed with white background
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phylogenetic tree visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths proportional to evolutionary distance as specified
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has scale bar, clade coloring, rectangular layout. Missing: no circular/radial
+          option shown'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All species visible, full tree displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps clade names to colors
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title is "Primate Phylogeny · tree-phylogenetic · plotnine · pyplots.ai"
+          - includes spec-id and library but has extra descriptive text
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical relationships, multiple clades, varying branch
+          lengths. Good diversity of primates from lemurs to humans.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Primate phylogeny based on mitochondrial DNA is a real, scientifically
+          accurate scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Branch lengths are reasonable evolutionary distances (0.1-0.65 substitutions/site)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions/classes, follows imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 4
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/tree-phylogenetic/metadata/pygal.yaml b/plots/tree-phylogenetic/metadata/pygal.yaml
index d4e18a43b4..05d3448df2 100644
--- a/plots/tree-phylogenetic/metadata/pygal.yaml
+++ b/plots/tree-phylogenetic/metadata/pygal.yaml
@@ -22,3 +22,188 @@ review:
   - Code uses a helper function which deviates from KISS principle
   - Tree is somewhat compressed with unused whitespace on the right side of the canvas
   - Y-axis grid lines are visible but serve no purpose for tree visualization
+  image_description: 'The plot displays a phylogenetic tree diagram showing primate
+    evolutionary relationships. The tree has a white background with blue (pyplots
+    blue #306998) branch lines connecting 6 species: Human, Chimpanzee, Gorilla, Orangutan,
+    Gibbon, and Macaque. Each species is represented by a colored dot marker at the
+    leaf node with matching colored bold text labels positioned to the right of each
+    marker. The colors used are red (Human), blue (Chimpanzee), teal (Gorilla), gold
+    (Orangutan), orange (Gibbon), and brown (Macaque) - a colorblind-friendly palette.
+    The x-axis shows "Evolutionary Distance (substitutions per site)" from 0 to 1,
+    with a scale bar in the bottom-left indicating "0.1 substitutions/site". The title
+    reads "Primate Evolution · tree-phylogenetic · pygal · pyplots.ai" at the top.
+    The tree correctly shows evolutionary relationships with Human-Chimpanzee as the
+    closest pair, then Gorilla, then Orangutan, with Gibbon-Macaque as an outgroup.'
+  criteria_checklist:
+    visual_quality:
+      score: 34
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis label, species labels, and scale bar text are all clearly
+          readable at the canvas size. Font sizes are well-scaled for 4800x2700.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; species labels are well-positioned
+          next to their markers.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Branch lines are clearly visible with appropriate stroke width.
+          Markers are well-sized (dots_size=28). Minor: some leaf nodes extend quite
+          far right leaving unused space.'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-friendly palette with distinct hues for each
+          species.
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good vertical spread but horizontal layout could be better optimized;
+          significant whitespace on the right side of the plot.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis has descriptive label with units "Evolutionary Distance (substitutions
+          per site)".
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No legend needed (labels inline), but vertical guide lines are visible
+          and somewhat distracting from the tree structure.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements a phylogenetic tree (rectangular cladogram style).
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X-axis correctly represents evolutionary distance; Y positions correctly
+          separate species.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has branch lengths proportional to evolutionary distance, species
+          labels, scale bar. Missing: clade coloring mentioned in spec notes.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: No formal legend, but inline labels are accurate. Could benefit from
+          clade grouping indication.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Primate Evolution · tree-phylogenetic · pygal ·
+          pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical relationships, branch lengths, multiple clades.
+          Could show more variation in branch lengths to highlight divergence differences.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Primate mitochondrial DNA evolution is a real, well-documented scenario.
+          Species relationships are scientifically accurate.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Substitution rates are plausible. Some branch length values could
+          be more varied to better demonstrate the proportionality feature.
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Generally follows KISS but has significant complexity with SVG manipulation
+          and coordinate conversion functions.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random seed needed as all values are hardcoded.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only necessary imports: pygal, cairosvg, Style.'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses manual SVG manipulation which is fragile; coordinate conversion
+          relies on hardcoded plot bounds.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Creates plot.png but also creates plot.svg and plot.html which are
+          not required.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal XY chart with custom styling, but had to resort to SVG
+          manipulation for labels and scale bar rather than using native pygal features.
+  verdict: APPROVED
diff --git a/plots/tree-phylogenetic/metadata/seaborn.yaml b/plots/tree-phylogenetic/metadata/seaborn.yaml
index fe52ce79dd..0aad6b93ae 100644
--- a/plots/tree-phylogenetic/metadata/seaborn.yaml
+++ b/plots/tree-phylogenetic/metadata/seaborn.yaml
@@ -24,3 +24,179 @@ review:
     logic preferred
   - Title format includes Primate Evolution prefix before spec-id which deviates from
     standard format
+  image_description: 'The plot displays a horizontal phylogenetic tree (dendrogram)
+    showing primate evolutionary relationships. The tree branches from left to right,
+    with 10 species labels on the right edge (Tarsier, Lemur, Chimpanzee, Human, Gorilla,
+    Orangutan, Gibbon, Macaque, Baboon, Marmoset). Branch colors encode clades: red
+    for Prosimians (Tarsier, Lemur), dark blue for Great Apes (Human, Chimpanzee,
+    Gorilla), light blue for Lesser Apes (Orangutan, Gibbon), yellow/gold for Old
+    World Monkeys (Baboon, Macaque), and green for New World Monkeys (Marmoset). The
+    x-axis shows "Evolutionary Distance (Million Years)" from 50 to 0. A legend in
+    the lower-left identifies the 5 clades. The title reads "Primate Evolution · tree-phylogenetic
+    · seaborn · pyplots.ai". Branch lines are thick (3px) and species labels are colored
+    to match their clade.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis label 20pt, tick labels 16-18pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all species labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Branch lines thick (3px), clearly visible against white background
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses distinct colors (blue, light blue, yellow, green, red) that
+          are distinguishable even for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, tree fills space well, minor issue with legend
+          positioning in lower-left away from some data
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Evolutionary Distance (Million Years)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is appropriately subtle (alpha=0.3), but legend is positioned
+          in lower-left which slightly crowds the tree root area
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct phylogenetic tree visualization using dendrogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Branch lengths proportional to evolutionary distance
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has clade coloring, branch lengths, legend. Scale annotation present
+          but could be more prominent (e.g., actual scale bar rather than text annotation)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 10 species visible with full evolutionary timeline
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 5 clades
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses format "{spec-id} · {library} · pyplots.ai" but prepends "Primate
+          Evolution ·" which deviates slightly from spec
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple clades, varying branch lengths, different evolutionary
+          distances
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Primate evolution based on mitochondrial DNA is a classic, neutral,
+          real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Evolutionary distances are biologically plausible (6-58 MYA range),
+          though some values could be more refined
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: false
+        comment: Contains a helper function `get_link_color()` which violates KISS
+          principle of no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn/matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses `sns.set_theme()` and `sns.despine()` for styling, but the core
+          dendrogram is from scipy. Seaborn enhances aesthetics but doesn't provide
+          native tree/dendrogram functions.
+  verdict: APPROVED
diff --git a/plots/treemap-basic/metadata/altair.yaml b/plots/treemap-basic/metadata/altair.yaml
index d1b4e429e9..f3bb033017 100644
--- a/plots/treemap-basic/metadata/altair.yaml
+++ b/plots/treemap-basic/metadata/altair.yaml
@@ -26,3 +26,176 @@ review:
   - Legend font sizes could be slightly larger for better readability at full resolution
   - No visual indication of hierarchy depth through color shading/intensity as mentioned
     in spec notes
+  image_description: 'The treemap displays market capitalization data by sector and
+    company using a strip-based layout. Technology sector (Python blue #306998) dominates
+    the left side with Apple ($2800B), Microsoft ($2400B), Google ($1800B), and NVIDIA
+    ($1200B) stacked vertically. Consumer (sage green), Healthcare (teal), Finance
+    (yellow), and Energy (coral) sectors follow to the right with decreasing widths
+    proportional to their total values. Each rectangle shows the company name in bold
+    white text with the market cap value below. White borders (3px) separate rectangles
+    clearly. A "Sector" legend appears on the right with proper color coding. The
+    title "treemap-basic · altair · pyplots.ai" is centered at the top.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Text is clearly readable; title 28pt, labels 20pt bold, values 16pt.
+          Slightly below optimal for largest canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; labels only shown on rectangles with sufficient
+          area.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangle sizes perfectly proportional to values; smart filtering
+          of labels for small areas.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors with good contrast; colorblind-safe palette
+          (no red-green confusion).
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization; treemap fills nearly the entire plot
+          area with balanced margins.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well-placed on right; no grid needed for treemaps. Legend
+          could be slightly larger.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct treemap visualization with nested rectangles proportional
+          to values.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Category → color/strip, subcategory → nested rectangles, value →
+          area.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has distinct colors per category, labels on larger rectangles, white
+          borders. Missing: color shading intensity for hierarchy depth.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 15 data points visible; values correctly proportioned.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 sectors with matching colors.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "treemap-basic · altair · pyplots.ai".
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchical structure with 5 categories and 15 companies;
+          demonstrates size variation well. Could show more extreme size differences.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market capitalization by sector/company is a perfect real-world treemap
+          use case; company values are realistic.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Market cap values in billions USD are accurate for major companies
+          (Apple ~$2.8T, etc.).
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean sequential flow: imports → data → layout calculation → chart
+          creation → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values needed for this dataset).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of mark_rect, layered charts, tooltips. However, Altair
+          doesn't have native treemap support, so manual layout calculation was required.
+          The implementation is creative but doesn't leverage Altair's declarative
+          strengths as much as possible.
+  verdict: APPROVED
diff --git a/plots/treemap-basic/metadata/bokeh.yaml b/plots/treemap-basic/metadata/bokeh.yaml
index e3d1da9814..39b9fa2aae 100644
--- a/plots/treemap-basic/metadata/bokeh.yaml
+++ b/plots/treemap-basic/metadata/bokeh.yaml
@@ -27,3 +27,169 @@ review:
     nesting)
   - Code complexity from manual squarify implementation could be simplified if using
     squarify library
+  image_description: 'The treemap displays a budget allocation visualization with
+    12 nested rectangles representing different departments and projects. The largest
+    rectangles are Backend ($220K) in blue and Enterprise ($200K) in teal, positioned
+    in the bottom-left quadrant. Categories are color-coded: Engineering (Python blue
+    #306998), Marketing (yellow #FFD43B), Sales (teal #4ECDC4), HR (terra cotta #E07A5F),
+    and Finance (sage green #81B29A). Each rectangle shows the subcategory name and
+    dollar value (e.g., "Frontend $180K", "Digital $150K"). The legend is placed in
+    the upper-right corner with colored squares and category labels. The title "treemap-basic
+    · bokeh · pyplots.ai" appears centered at the top. White borders separate rectangles
+    clearly.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Labels readable at 24pt, title at 32pt; some smaller rectangles have
+          condensed labels
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; smart label sizing hides labels on tiny rectangles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles well-sized with good proportions from squarify algorithm
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with distinct hues (blue, yellow, teal, terra
+          cotta, sage)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; treemap fills most of the area; legend
+          placement is functional but slightly overlaps content area
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend clearly shows category colors; no distracting grid
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct treemap visualization with nested rectangles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to rectangle areas
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has distinct category colors, labels for larger rectangles, white
+          borders; hierarchy shown through color rather than nesting depth
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 data items visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "treemap-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variety of sizes from $40K to $220K; demonstrates part-to-whole
+          relationships well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department/project is a realistic business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in $40K-$220K range are plausible for departmental budgets
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly follows imports→data→plot→save; squarify algorithm adds complexity
+          but is necessary since bokeh has no native treemap
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Implements custom squarify algorithm with ColumnDataSource, LabelSet
+          for annotations, rect glyphs, and generates both PNG and interactive HTML
+          output
+  verdict: APPROVED
diff --git a/plots/treemap-basic/metadata/highcharts.yaml b/plots/treemap-basic/metadata/highcharts.yaml
index dbc61068d0..64c610b94c 100644
--- a/plots/treemap-basic/metadata/highcharts.yaml
+++ b/plots/treemap-basic/metadata/highcharts.yaml
@@ -27,3 +27,180 @@ review:
   - Tooltip shows values but rectangles themselves do not display the dollar amounts
   - Could add subtle hover effects or animation to showcase Highcharts interactivity
     in HTML version
+  image_description: 'The treemap displays a budget allocation visualization with
+    5 main categories shown as colored rectangles. **Engineering** (blue, #306998)
+    occupies the largest area in the upper-left, containing subcategories: Backend
+    Dev, Frontend Dev, DevOps, and QA Testing. **Marketing** (yellow, #FFD43B) is
+    in the lower-left with Digital Ads, Content, Events, and Branding. **Sales** (cyan,
+    #17BECF) is in the upper-middle-right with Enterprise, SMB, and Partner. **Operations**
+    (purple, #9467BD) is in the upper-right with Infrastructure, Logistics, and Facilities.
+    **HR** (brown, #8C564B) is in the lower-right with Recruiting, Training, and Benefits.
+    The title "Budget Allocation · treemap-basic · highcharts · pyplots.ai" is displayed
+    at the top. Rectangle areas are proportional to budget values, with white borders
+    separating categories and subcategories.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and category labels are clear and readable; subcategory labels
+          could be slightly larger for some smaller rectangles
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: All rectangles are clearly visible with appropriate sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, cyan, purple, brown)
+          - no red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization, treemap fills the space well
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for treemaps (no axes), but no value labels shown on rectangles
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend disabled (appropriate for treemap), borders are subtle and
+          effective
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct treemap visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Hierarchical data correctly mapped with parent-child relationships
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: nested rectangles, area proportional
+          to value, distinct colors for categories, labels, subtle borders'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible in treemap
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Colors correctly identify categories (legend not needed as colors
+          are in rectangles)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Budget Allocation · treemap-basic · highcharts
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchy with 5 main categories and 17 subcategories; good
+          variety in sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a real, comprehensible business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values ($10K-$45K) are realistic for project budgets, though total
+          range could be slightly wider
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → chart config → export'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also creates plot_raw.png intermediate (cleaned
+          up)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses squarified layout algorithm, drill-down capability enabled,
+          multi-level configuration; could showcase more Highcharts-specific features
+          like animation or custom tooltips
+  verdict: APPROVED
diff --git a/plots/treemap-basic/metadata/letsplot.yaml b/plots/treemap-basic/metadata/letsplot.yaml
index ec39ce688b..b306adac1a 100644
--- a/plots/treemap-basic/metadata/letsplot.yaml
+++ b/plots/treemap-basic/metadata/letsplot.yaml
@@ -24,3 +24,170 @@ review:
     categories
   - Legend positioned far right creates visual imbalance with treemap on left side
   - Could leverage more lets-plot specific features for enhanced visualization
+  image_description: The plot displays a treemap visualization showing corporate budget
+    allocation across 8 departments. The largest rectangle is Engineering (32%) in
+    Python blue (#306998), positioned on the left side occupying about a third of
+    the canvas. Marketing (22%) appears as a prominent yellow rectangle in the upper
+    right area. Sales (18%) is shown in green, Operations (12%) in coral/orange. Smaller
+    departments HR (7%), Finance (5%), R&D (3%), and Legal (1%) are arranged in progressively
+    smaller rectangles. White borders separate all rectangles clearly. Labels show
+    department names and percentages in bold white text for larger rectangles, while
+    smaller ones show only percentages. A legend titled "Department" is positioned
+    on the right side listing all 8 categories with their corresponding colors. The
+    title reads "Budget Allocation · treemap-basic · letsplot · pyplots.ai" at the
+    top center.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are readable, though some smaller rectangle labels
+          could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels are well-placed within rectangles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangle sizes are well-proportioned and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with good contrast, colors are distinct
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, though legend area creates some imbalance
+          on the right
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed and clearly labeled
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct treemap visualization with nested rectangles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Value correctly determines rectangle area
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors for categories, labels for larger rectangles, borders
+          between rectangles
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 8 categories visible and proportionally represented
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all departments with accurate colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Budget Allocation · treemap-basic · letsplot ·
+          pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchy well with varying sizes; missing subcategory hierarchy
+          level mentioned in spec
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Corporate budget allocation is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, values are plausible for corporate budgets
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Contains helper functions (squarify, make_label) which add complexity,
+          though necessary for treemap
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2-style grammar appropriately with geom_rect and geom_text,
+          but doesn't leverage any unique lets-plot features beyond basic ggplot syntax
+  verdict: APPROVED
diff --git a/plots/treemap-basic/metadata/matplotlib.yaml b/plots/treemap-basic/metadata/matplotlib.yaml
index eab8543c20..e30d74b5a2 100644
--- a/plots/treemap-basic/metadata/matplotlib.yaml
+++ b/plots/treemap-basic/metadata/matplotlib.yaml
@@ -21,3 +21,168 @@ review:
   - Clean KISS structure with clear data-plot-save flow
   weaknesses:
   - Two HR rectangles (Recruiting and Training) are missing labels despite being visible
+  image_description: 'The treemap displays a budget allocation visualization with
+    12 nested rectangles representing different departments and their projects. The
+    largest rectangles are "Product Dev" ($45M, dark blue) and "Enterprise" ($35M,
+    light blue) on the left side. "Digital" ($30M, golden yellow) and "Infrastructure"
+    ($25M, dark blue) occupy significant space in the center-left. Smaller rectangles
+    for "QA", "Logistics", "Partners", "Support", "SMB", "Events", "Recruiting", and
+    "Training" fill the right side. Each rectangle shows the subcategory name and
+    budget value. The title "Budget Allocation by Department · treemap-basic · matplotlib
+    · pyplots.ai" appears at the top. A horizontal legend at the bottom identifies
+    the 5 department colors: Engineering (dark blue), Marketing (golden yellow), Sales
+    (light blue), Operations (gray), and HR (light yellow).'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is 24pt bold, labels are properly sized (12-18pt based on area),
+          all text clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, labels fit within their rectangles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles are well-sized, proportional to values, alpha=0.85 provides
+          good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Five distinct colors with good contrast; blue/yellow palette is colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Treemap fills the canvas effectively with proper aspect ratio
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is well-placed below the chart, but no grid needed for treemap
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct treemap visualization with nested rectangles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Area correctly proportional to budget values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors for categories, labels on larger rectangles, white
+          borders between rectangles
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 data items visible and properly sized
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 departments with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format with spec-id, library, and pyplots.ai
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows hierarchy (department→project), varying sizes, but nesting
+          depth could be more visually distinct
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation is a realistic treemap use case with plausible
+          values
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values ($8M-$45M) are realistic for department allocations
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 1
+        max: 3
+        passed: false
+        comment: No random seed, but data is deterministic (partial credit)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib.pyplot, matplotlib.patches)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but would be better with consistent naming
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Implements custom squarify algorithm with matplotlib patches, showing
+          flexibility but not using advanced features
+  verdict: APPROVED
diff --git a/plots/treemap-basic/metadata/plotly.yaml b/plots/treemap-basic/metadata/plotly.yaml
index 0bed4d71d6..ac2200b202 100644
--- a/plots/treemap-basic/metadata/plotly.yaml
+++ b/plots/treemap-basic/metadata/plotly.yaml
@@ -21,3 +21,172 @@ review:
   weaknesses:
   - Root node Budget label appears small and could be more prominent
   - No units shown in the values (should indicate thousands)
+  image_description: 'The plot displays a treemap showing budget allocation by department
+    and project (in thousands). The visualization uses five distinct colors for main
+    categories: Engineering (dark blue #306998), Sales (green #2CA02C), Marketing
+    (yellow #FFD43B), Operations (purple #9467BD), and HR (pink #E377C2). Each main
+    category contains nested subcategories with their corresponding values. The layout
+    is hierarchical with "Budget" as the root node, five department categories as
+    children, and individual projects as leaf nodes. White borders (2px) clearly delineate
+    hierarchy boundaries between rectangles. All labels display both the name and
+    numeric value. The title "treemap-basic · plotly · pyplots.ai" is centered at
+    the top. The treemap fills the canvas well with Engineering taking the largest
+    portion (1150 total), followed by Sales (640), Marketing (520), Operations (280),
+    and HR (190).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text clearly readable with font size 24, title at 32pt
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles well-sized proportional to values
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette with distinct hues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Treemap fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for treemaps (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for treemaps (hierarchy shown via nesting)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct treemap chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories, subcategories, and values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Nested rectangles, area proportional to value, distinct category
+          colors, borders between rectangles
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible and accessible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Categories self-labeled via color coding
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: treemap-basic · plotly · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows varied hierarchy depths, different sized categories, multiple
+          subcategories per parent
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a classic treemap use case
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: false
+        comment: Values are sensible but lack explicit unit display in labels (-2)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only plotly.graph_objects imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png but also plot.html (minor)
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Uses go.Treemap with branchvalues, hover templates with percentParent,
+          and HTML export
+  verdict: APPROVED
diff --git a/plots/treemap-basic/metadata/plotnine.yaml b/plots/treemap-basic/metadata/plotnine.yaml
index 015bb8568e..4e05413274 100644
--- a/plots/treemap-basic/metadata/plotnine.yaml
+++ b/plots/treemap-basic/metadata/plotnine.yaml
@@ -25,3 +25,167 @@ review:
     be "treemap-basic · plotnine · pyplots.ai" only
   - Labels use black text which works for lighter colors but could be harder to read
     on darker Python Blue rectangles
+  image_description: 'The treemap displays budget allocation across 6 departments
+    using nested rectangles. Colors used: Python Blue (#306998) for Engineering (R&D,
+    Infrastructure, QA), Python Yellow (#FFD43B) for Marketing (Digital, Events),
+    Teal (#4ECDC4) for Sales (Direct, Channel, Support), Coral (#FF6B6B) for Operations
+    (Logistics, Facilities), Light Green (#95E1A3) for HR (Recruiting), and Plum (#DDA0DD)
+    for Finance (Accounting). Each rectangle shows subcategory name and value (e.g.,
+    "R&D $450K"). The title "Budget Allocation by Department · treemap-basic · plotnine
+    · pyplots.ai" appears at top. A department legend is positioned on the right.
+    White borders clearly separate rectangles. The squarified layout algorithm produces
+    well-proportioned rectangles filling the canvas appropriately.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and bold, labels are clearly readable with bold text
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels fit within their rectangles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangles are well-sized, proportional to values
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colorblind-safe palette with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, slight gap between plot and legend
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well placed on right, no grid needed for treemap
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct treemap with nested rectangles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Category, subcategory, and value correctly mapped to color, labels,
+          and area
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has distinct colors, labels, white borders; hierarchy shown via color
+          grouping (spec suggested nesting depth or shading intensity as alternative)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 data items visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all departments
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but title includes extra descriptive text before
+          the required format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows variation in rectangle sizes, but all rectangles are similar
+          aspect ratios
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values ($80K-$450K) are realistic for departmental budgets
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Mostly KISS but squarified algorithm adds complexity (necessary for
+          treemap)
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Data is deterministic, no random seed needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of geom_rect + geom_text to build treemap, leverages
+          plotnine grammar of graphics with custom layout algorithm
+  verdict: APPROVED
diff --git a/plots/treemap-basic/metadata/pygal.yaml b/plots/treemap-basic/metadata/pygal.yaml
index 82080172b0..4655962928 100644
--- a/plots/treemap-basic/metadata/pygal.yaml
+++ b/plots/treemap-basic/metadata/pygal.yaml
@@ -25,3 +25,169 @@ review:
     displayed on the rectangles - only values are shown
   - Colors could be more optimized for colorblind accessibility (current palette relies
     somewhat on hue differences)
+  image_description: 'The treemap displays a budget allocation visualization with
+    4 main department categories represented as large colored rectangles, each containing
+    3 subcategory rectangles. The colors used are: steel blue for Engineering, golden
+    yellow for Marketing, coral pink for Sales, and teal/cyan for Operations. Each
+    rectangle shows its budget value formatted as currency (e.g., "$450K", "$320K").
+    The rectangles are sized proportionally to their values - Engineering''s R&D ($450K)
+    and Marketing''s Digital ($320K) are the largest. The title "Budget Allocation
+    · treemap-basic · pygal · pyplots.ai" appears at the top. A legend at the bottom
+    shows all 4 department categories. The layout fills the canvas well with clear
+    visual hierarchy.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and values are clearly readable; font sizes are appropriate
+          for the canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; values are well-positioned within rectangles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: All rectangles clearly visible with good proportional sizing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color choices with sufficient contrast; colors are distinguishable
+          but could be more colorblind-optimized
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent canvas utilization; treemap fills most of the plot area
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom with 4 columns matching categories
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct treemap visualization with nested rectangles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to rectangle areas
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows hierarchy, distinct colors per category, labels on rectangles
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data values visible and correctly represented
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all 4 departments
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Budget Allocation · treemap-basic · pygal
+          · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows hierarchical structure with categories and subcategories; however,
+          subcategory labels (R&D, Infrastructure, etc.) are not visible in the rendered
+          output - only values shown
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a plausible and common business
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Budget values in $K range (110-450) are realistic for departmental
+          allocations
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → style → chart → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic but no explicit seed needed since no randomization
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's Treemap class, custom Style, value_formatter for currency
+          display, legend positioning; could leverage more advanced features
+  verdict: APPROVED
diff --git a/plots/treemap-basic/metadata/seaborn.yaml b/plots/treemap-basic/metadata/seaborn.yaml
index 504bf119e0..e06468cd87 100644
--- a/plots/treemap-basic/metadata/seaborn.yaml
+++ b/plots/treemap-basic/metadata/seaborn.yaml
@@ -22,3 +22,175 @@ review:
   weaknesses:
   - Uses strict=True in zip() which is Python 3.10+ specific and unnecessary for this
     use case
+  image_description: 'The treemap displays a "Budget Allocation by Department" visualization
+    with 12 nested rectangles representing subcategories across 5 main departments.
+    The largest rectangle (Product Dev $45M) is in blue (Engineering), followed by
+    Enterprise ($35M) in golden/orange (Sales). The color scheme uses a colorblind-safe
+    palette: blue for Engineering, golden/orange for Sales, teal/green for Marketing,
+    orange for Operations, and pink for HR. Each rectangle is labeled with the subcategory
+    name and dollar value in millions. Rectangles within the same category show depth
+    shading (lighter shades for smaller values). White borders separate all rectangles,
+    and a horizontal legend at the bottom identifies the 5 departments. The title
+    follows the required format with "treemap-basic · seaborn · pyplots.ai" suffix.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'All labels readable, title is 24pt bold, subcategory labels scale
+          appropriately. Minor: some smaller rectangles (Training $8M) have slightly
+          tight spacing'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels fit within their rectangles
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Rectangle sizes clearly distinguish value differences, alpha=0.9
+          provides good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses seaborn's colorblind palette, distinct hues for all 5 categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space, treemap fills most of the area, legend
+          positioned below. Minor padding imbalance on right side
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed below plot with good spacing, ncol=5 keeps
+          it compact
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct treemap visualization showing hierarchical data as nested
+          rectangles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly determine rectangle area (Product Dev largest at
+          $45M)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct category colors, labels for all rectangles, borders between
+          rectangles, hierarchy shown through color shading
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 12 items visible with appropriate sizing
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly maps colors to department names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{title} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows hierarchy (category → subcategory), varying sizes, depth shading.
+          Minor: could benefit from showing a more extreme size contrast'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation by department is a real, relatable business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values in $M range are realistic for corporate budgets, though total
+          of ~$265M is somewhat large for a single organization
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → processing → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (matplotlib, numpy, seaborn, squarify, patches)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses `strict=True` in zip which requires Python 3.10+, not strictly
+          deprecated but adds unnecessary constraint
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png' with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_style("white"), sns.color_palette("colorblind"), and
+          sns.light_palette for depth shading. However, seaborn doesn't have a native
+          treemap function, so squarify does the heavy lifting. The seaborn usage
+          is primarily for styling and color palette generation.
+  verdict: APPROVED
diff --git a/plots/venn-basic/metadata/bokeh.yaml b/plots/venn-basic/metadata/bokeh.yaml
index 8ee9f86591..bc1dcc481a 100644
--- a/plots/venn-basic/metadata/bokeh.yaml
+++ b/plots/venn-basic/metadata/bokeh.yaml
@@ -24,3 +24,172 @@ review:
   - Set size labels (n=100, n=80, n=60) defined in code but not rendering visibly
     in output image
   - Hover tooltips could display actual region data instead of generic instructions
+  image_description: 'The plot displays a 3-set Venn diagram with three overlapping
+    elliptical shapes on a white background. Product A (blue, top-left) shows 60 in
+    its exclusive region, Product B (yellow, top-right) shows 35, and Product C (green,
+    bottom-center) shows 25. The intersection regions display: A∩B=20 (olive/greenish
+    overlap at top-center), A∩C=10 (teal overlap at left), B∩C=15 (light green overlap
+    at right), and the triple intersection A∩B∩C=10 (darkest green at center). The
+    title "venn-basic · bokeh · pyplots.ai" appears at the top. Set labels "Product
+    A", "Product B", and "Product C" are positioned outside their respective circles.
+    The diagram uses transparency effectively to show overlapping regions with distinct
+    colors.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is readable; title and labels are clear, though region numbers
+          could be slightly larger for optimal visibility
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap; all labels and numbers are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are well-sized and fill the canvas appropriately; all regions
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green palette is colorblind-friendly; overlapping
+          regions create distinct hues
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; slight asymmetry with more whitespace at
+          bottom, but overall well-balanced
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend present showing set totals (n=100, n=80, n=60) - the code
+          has them but they're not visible in the output
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3-set Venn diagram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All set relationships correctly represented
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows counts in each region; missing the total set sizes (n=XX) that
+          the code intended to display
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 7 regions shown with correct values
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Set labels present but total sizes not visible
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "venn-basic · bokeh · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: exclusive regions, pairwise overlaps, triple
+          overlap'
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Product feature comparison is plausible and neutral; could be more
+          specific
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic and mathematically consistent (inclusion-exclusion
+          verified)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Flat structure with inline loop, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: No random seed needed (deterministic data), but slightly less explicit
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses HoverTool for interactivity, patch rendering for circles, Label
+          model for text; could leverage more Bokeh features like linked selections
+          or custom tooltips with actual data
+  verdict: APPROVED
diff --git a/plots/venn-basic/metadata/highcharts.yaml b/plots/venn-basic/metadata/highcharts.yaml
index 0b6e5d7e68..c16b61af26 100644
--- a/plots/venn-basic/metadata/highcharts.yaml
+++ b/plots/venn-basic/metadata/highcharts.yaml
@@ -24,3 +24,168 @@ review:
   weaknesses:
   - Legend does not explicitly map colors to set names (Highcharts Venn limitation)
   - Very slight bottom edge clipping visible in the image
+  image_description: 'The plot displays a three-circle Venn diagram showing "Developer
+    Skill Distribution (Team of 150)". The three overlapping circles represent Backend
+    (blue, #306998), Frontend (yellow, #FFD43B), and DevOps (purple, #9467BD) developers.
+    Each region displays labels with the set name and value: Backend: 100, Frontend:
+    80, DevOps: 60. Intersection labels show Backend∩Frontend: 30, Backend∩DevOps:
+    20, DevOps∩Frontend: 15, and the triple intersection Backend∩DevOps∩Frontend:
+    10. The title "venn-basic · highcharts · pyplots.ai" appears at the top with a
+    subtitle. The diagram fills the canvas well with good proportions. Colors show
+    transparency in overlapping regions.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All labels readable, title and data labels clear, though intersection
+          labels could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlap issues, all labels positioned clearly within their
+          regions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are well-sized, overlapping regions clearly visible with
+          transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, purple) - no red-green
+          conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, diagram centered with balanced whitespace, minor
+          bottom clipping visible
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend enabled and appropriately styled
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Venn diagram type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Sets and intersections correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows overlaps with counts; area appears somewhat proportional but
+          not strictly so
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All values displayed correctly
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend enabled but doesn't show set colors explicitly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "venn-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 3 sets with all pairwise and triple intersections; good demonstration
+          of Venn features
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Developer skill distribution is a relatable, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic (team of 150, with reasonable overlap numbers)
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → chart setup → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (no random), but no explicit seed needed here
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html correctly
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses VennSeries, proper Highcharts data structure, interactive HTML
+          export, data labels with formatting
+  verdict: APPROVED
diff --git a/plots/venn-basic/metadata/letsplot.yaml b/plots/venn-basic/metadata/letsplot.yaml
index 688f7dd04c..4d37a60b60 100644
--- a/plots/venn-basic/metadata/letsplot.yaml
+++ b/plots/venn-basic/metadata/letsplot.yaml
@@ -24,3 +24,173 @@ review:
   - File output requires os workaround to move files from lets-plot-images subdirectory
   - Could use geom_path instead of geom_polygon with size parameter for circle borders
     (minor)
+  image_description: 'The plot displays a classic 3-circle Venn diagram showing overlapping
+    research fields: "Machine Learning" (blue, top-left), "Statistics" (yellow, top-right),
+    and "Data Engineering" (pink, bottom). Each circle has clear set labels positioned
+    outside the circles. The exclusive region counts are displayed within each area:
+    45 (ML only), 35 (Statistics only), 30 (Data Engineering only). Pairwise intersections
+    show: 25 (ML & Statistics), 15 (ML & Data Engineering), 20 (Statistics & Data
+    Engineering). The center triple intersection shows 10. The title "venn-basic ·
+    lets-plot · pyplots.ai" appears at the top. The circles use transparency (alpha
+    ~0.35) creating distinct visual blending in overlap regions. White borders separate
+    the circles. The layout is square (1:1 aspect ratio) with good canvas utilization.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text clearly readable; numbers and labels are bold and well-sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all counts and labels are clearly positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are large and well-sized, overlap regions clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/pink palette is colorblind-friendly with good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization; Venn diagram fills significant portion of
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for Venn diagrams (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No legend shown (acceptable since labels are on circles); theme_void
+          appropriate
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3-circle Venn diagram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Set labels, sizes, and intersections all correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 sets, overlapping circles, counts in
+          each region, transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All values visible and correctly positioned
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Set labels clearly identify each circle
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "venn-basic · lets-plot · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows exclusive, pairwise, and triple overlaps
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Research fields/skills overlap is a plausible, neutral scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic (45, 35, 30, 25, 20, 15, 10)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: numpy imported but seed is the only use (acceptable); pandas used
+          appropriately
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: File handling with os.rename adds complexity; saves plot.png correctly
+          but via workaround
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_polygon, geom_text, theme_void, scale_fill_manual;
+          creative approach to Venn using polygon geometry
+  verdict: APPROVED
diff --git a/plots/venn-basic/metadata/matplotlib.yaml b/plots/venn-basic/metadata/matplotlib.yaml
index f8102ceb66..7ce7c94c95 100644
--- a/plots/venn-basic/metadata/matplotlib.yaml
+++ b/plots/venn-basic/metadata/matplotlib.yaml
@@ -27,3 +27,177 @@ review:
   - Could add a brief subtitle or annotation explaining the data context
   - The yellow JavaScript circle could benefit from a slightly darker shade for better
     contrast against white background
+  image_description: 'The plot shows a classic three-circle Venn diagram with overlapping
+    circles representing Python (blue, #306998), JavaScript (yellow, #FFD43B), and
+    SQL (cyan, #4ECDC4). The title "venn-basic · matplotlib · pyplots.ai" is displayed
+    at the top in bold black text. Each set is labeled with bold text (Python top-left,
+    JavaScript top-right, SQL bottom-center). Each region displays its count in bold:
+    Python-only (40), JavaScript-only (25), SQL-only (15), Python+JavaScript overlap
+    (20), Python+SQL overlap (10), JavaScript+SQL overlap (15), and the central triple
+    overlap (10). The circles have 0.6 alpha transparency, creating distinct blended
+    colors in overlapping regions. The plot uses a square 12x12 figure with a white
+    background.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at fontsize 28, set labels at 24pt, subset numbers at 20pt
+          - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlaps; all numbers and labels are well-positioned within
+          their regions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are well-sized and fill the canvas appropriately; alpha=0.6
+          provides good visibility of overlaps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue, yellow, and cyan are distinguishable; however yellow on white
+          could have slightly better contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format (12x12) is appropriate for Venn diagram; good use of
+          canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Venn diagrams (no axes), but set labels are present and clear
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Venn diagrams; clean white background is appropriate
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Venn diagram with 3 overlapping circles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: All 7 regions correctly mapped with exclusive counts
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows set labels, counts in each region, overlapping areas with transparency
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible; numbers range appropriately from 10-40
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Set labels (Python, JavaScript, SQL) clearly identify each circle
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "venn-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows all 7 regions of a 3-set Venn; good variety in overlap sizes;
+          could show more variation between set sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Developer survey about programming language proficiency is a realistic,
+          neutral, and relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are plausible for a survey; total ~135 respondents is reasonable
+          though exact proportions are somewhat arbitrary
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded values), but no random seed statement
+          since none needed
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and matplotlib_venn are imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib_venn API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Correctly saves as plot.png with dpi=300 and bbox_inches="tight"
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses matplotlib_venn extension library effectively with venn3 function;
+          styling through set_labels and subset_labels customization
+  verdict: APPROVED
diff --git a/plots/venn-basic/metadata/plotly.yaml b/plots/venn-basic/metadata/plotly.yaml
index fe8c1fad00..fbdb000a9f 100644
--- a/plots/venn-basic/metadata/plotly.yaml
+++ b/plots/venn-basic/metadata/plotly.yaml
@@ -24,3 +24,173 @@ review:
   weaknesses:
   - Square format (1200x1200 scaled to 3600x3600) deviates from recommended 1600x900
     landscape in library rules (minor, justified for this plot type)
+  image_description: 'The plot displays a 3-circle Venn diagram on a white background.
+    The three circles represent Python (blue, top), JavaScript (yellow, bottom-left),
+    and SQL (green, bottom-right). Each circle is filled with semi-transparent color
+    (approx. 50% opacity) allowing overlap regions to be visible. Set labels appear
+    outside each circle with total counts in parentheses: Python (100), JavaScript
+    (80), SQL (60). Each region displays its count: Python only (60), JavaScript only
+    (35), SQL only (25), Python & JavaScript (20), Python & SQL (10), JavaScript &
+    SQL (15), and All three (10) in the center. The title "venn-basic · plotly · pyplots.ai"
+    appears at the top. The diagram uses an equilateral triangle arrangement with
+    good spacing.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title at 36pt, set labels at 28pt,
+          region counts at 24pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all labels and counts are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are well-sized and overlapping regions are clearly visible
+          with transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, and green are colorblind-safe and distinguishable
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, diagram is well-centered with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for Venn diagrams (no axes), awarding 0 as criteria doesn't apply
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid needed; legend is disabled appropriately since labels are
+          on diagram
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct Venn diagram with 3 overlapping circles
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Set sizes and overlaps correctly mapped to regions
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 sets, labels, counts in each region,
+          transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data regions displayed with accurate counts
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Set labels are correct and positioned outside circles
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "venn-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows all 7 regions of a 3-set Venn; minor: doesn''t demonstrate
+          2-set variant'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language preferences is a real, neutral, relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Survey of 100/80/60 users with reasonable overlap percentages
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save pattern, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random, hardcoded values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with fill="toself" for circles, annotations for labels,
+          proper layout configuration, and outputs both PNG and interactive HTML
+  verdict: APPROVED
diff --git a/plots/venn-basic/metadata/plotnine.yaml b/plots/venn-basic/metadata/plotnine.yaml
index 86a5b9ceda..f5987ba62d 100644
--- a/plots/venn-basic/metadata/plotnine.yaml
+++ b/plots/venn-basic/metadata/plotnine.yaml
@@ -24,3 +24,176 @@ review:
   - 'Two of the three colors are shades of blue (#306998 and #4B8BBE), which could
     be confusing for colorblind users'
   - Figure size is 12x12 (square) instead of 16x9 as recommended in library guidelines
+  image_description: 'The plot displays a classic three-circle Venn diagram showing
+    the overlap between Python Developers, Data Scientists, and ML Engineers. The
+    three circles are arranged in a triangular formation with significant overlap
+    in the center. Colors used are a muted blue (#306998) for Python Developers, yellow
+    (#FFD43B) for Data Scientists, and a lighter blue (#4B8BBE) for ML Engineers.
+    Each region displays its count: 60 (Python only), 35 (Data Scientists only), 25
+    (ML Engineers only), 20 (Python ∩ Data Scientists), 10 (Python ∩ ML), 15 (Data
+    Scientists ∩ ML), and 10 (all three). Set labels are positioned outside the circles.
+    The title "venn-basic · plotnine · pyplots.ai" is centered at the top. The background
+    is clean white with no axes or grid lines visible.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All numbers and labels are clearly readable with appropriate font
+          sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all region counts are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are well-sized with good transparency (alpha=0.45) showing
+          overlaps clearly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinguishable but two shades of blue could be confusing
+          for some users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas but slightly more space at bottom than top
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for Venn diagrams, axes correctly hidden
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend hidden as labels are on plot, but no legend explaining what
+          counts mean
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct three-circle Venn diagram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Set memberships and intersections correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 3 sets, overlaps, counts in regions,
+          distinct colors with transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All regions visible and counts displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Set labels positioned correctly outside circles
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "venn-basic · plotnine · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows all 7 regions of a 3-set Venn but all sets use similar-ish
+          data ranges
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech team skills overlap is a plausible, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Numbers (60, 35, 25, etc.) are realistic for a team/organization
+          context
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random), but no explicit seed for any potential
+          randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics well with geom_polygon and geom_text,
+          but Venn diagrams are not a native strength of plotnine
+  verdict: APPROVED
diff --git a/plots/venn-basic/metadata/seaborn.yaml b/plots/venn-basic/metadata/seaborn.yaml
index 6f4a5f738a..a86caf7c54 100644
--- a/plots/venn-basic/metadata/seaborn.yaml
+++ b/plots/venn-basic/metadata/seaborn.yaml
@@ -25,3 +25,177 @@ review:
     labels)
   - Limited use of seaborn-specific features (seaborn has no native Venn support,
     so matplotlib patches are required)
+  image_description: 'The plot displays a three-set Venn diagram with overlapping
+    circles arranged in an equilateral triangle layout. The top circle (mint/seafoam
+    green) represents Python users (n=100), the bottom-left circle (peach/salmon)
+    represents JavaScript users (n=80), and the bottom-right circle (light blue/lavender)
+    represents SQL users (n=60). Each region displays count values with percentages
+    in white rounded boxes: Python-only=60, JavaScript-only=35, SQL-only=25, Python∩JavaScript=20,
+    Python∩SQL=10, JavaScript∩SQL=15, and the central intersection (all three)=10.
+    The title "venn-basic · seaborn · pyplots.ai" appears at the top in bold black
+    text. Set labels with sample sizes are positioned outside their respective circles.
+    Colors are from the Set2 palette with 40% transparency, allowing overlap regions
+    to be visible through color blending.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is large and clearly readable: title at 24pt, set labels
+          at 22pt bold, region counts at 20pt bold with white background boxes'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; all labels and numbers are well-separated and
+          clearly positioned in their respective regions
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Circles are well-sized, overlap regions clearly visible through transparency
+          (alpha=0.4), white boxes make counts readable against any background
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses Set2 colorblind-safe palette; green, peach, and blue are distinguishable
+          to colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of square canvas (12x12) for symmetric diagram; circles
+          well-centered but slight asymmetry in bottom label positions (-0.3 adjustments)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean white background with no grid (appropriate), no traditional
+          legend needed as labels are inline
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correctly implements a 3-set Venn diagram as specified
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Set sizes and intersections correctly calculated and displayed; inclusion-exclusion
+          principle properly applied
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec requirements met: 3 sets, overlapping circles, counts/percentages
+          in regions, distinct colors with transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data displayed; total respondents calculated correctly (175)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Set labels with sizes (n=X) clearly identify each circle
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "venn-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all Venn diagram features: exclusive regions, pairwise overlaps,
+          and triple intersection'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Developer survey on programming languages is plausible and neutral,
+          but generic; subtitle adds context nicely
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for a developer survey (100, 80, 60 respondents
+          with sensible overlaps)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → seaborn styling → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: false
+        comment: numpy imported but only used for seed and trigonometry; could argue
+          seed isn't random data generation
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses strict=True in zip which is Python 3.10+ feature; acceptable
+          but non-standard
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: false
+        comment: Uses sns.set_theme() and sns.color_palette() for styling, but Venn
+          diagrams are drawn with matplotlib patches (no native seaborn Venn support);
+          this is acceptable given seaborn lacks Venn diagram functions
+  verdict: APPROVED
diff --git a/plots/violin-basic/metadata/altair.yaml b/plots/violin-basic/metadata/altair.yaml
index 99935a0b74..441557153e 100644
--- a/plots/violin-basic/metadata/altair.yaml
+++ b/plots/violin-basic/metadata/altair.yaml
@@ -25,3 +25,177 @@ review:
   - Color alternating pattern (2 colors for 4 categories) does not add meaningful
     category distinction
   - Could benefit from adding tooltips for interactivity
+  image_description: 'The plot displays four violin plots showing salary distributions
+    by department (Engineering, Marketing, Sales, Support). The violins are colored
+    in a blue-yellow alternating pattern (#306998 blue for Engineering and Sales,
+    #FFD43B yellow for Marketing and Support). Each violin shows the kernel density
+    estimation shape of the salary distribution. Inside each violin, there is a black
+    vertical line representing the interquartile range (IQR) and a white dot with
+    black border marking the median. The y-axis shows "Salary ($)" ranging from 10,000
+    to 140,000. Department names appear below each violin. The title "violin-basic
+    · altair · pyplots.ai" is centered at the top. The Sales violin clearly shows
+    a bimodal distribution (two bulges). All text is clearly readable with appropriate
+    font sizes.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable with
+          appropriate font sizes
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violins are well-sized with good opacity (0.75), quartile markers
+          and median points clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe, but only uses 2 colors for
+          4 categories (alternating pattern)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with faceted design, though spacing could be slightly
+          tighter
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Salary ($)" includes units, department names are descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No grid lines visible on plot, no legend (colors alternate but don't
+          distinguish categories meaningfully)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with kernel density estimation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows quartile markers (IQR line), median point, mirrored density
+          on both sides
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed as categories are labeled directly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "violin-basic · altair · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent! Shows varied distribution shapes: normal (Engineering,
+          Marketing, Support), bimodal (Sales), different spreads'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary by department is a perfect real-world scenario for violin
+          plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary ranges (35K-140K) are realistic for the departments shown
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Code works but uses some complex transform patterns
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's transform_density, layering, and faceting well, though
+          could use more interactive features
+  verdict: APPROVED
diff --git a/plots/violin-basic/metadata/bokeh.yaml b/plots/violin-basic/metadata/bokeh.yaml
index 1c015794cb..a849519e49 100644
--- a/plots/violin-basic/metadata/bokeh.yaml
+++ b/plots/violin-basic/metadata/bokeh.yaml
@@ -24,3 +24,176 @@ review:
   weaknesses:
   - Y-axis uses scientific notation (2.000e+4) instead of human-readable currency
     format
+  image_description: The plot displays four violin plots showing salary distributions
+    across four departments (Engineering, Marketing, Sales, Support). Each violin
+    uses alternating Python blue (#306998) and yellow (#FFD43B) colors. The violins
+    are mirrored/symmetrical showing kernel density estimation on both sides. Inside
+    each violin is a white box showing quartiles (Q1-Q3), a horizontal black median
+    line, and whiskers extending to 1.5×IQR. The title "violin-basic · bokeh · pyplots.ai"
+    appears at the top left. X-axis shows "Department" with category labels, Y-axis
+    shows "Annual Salary (USD)" in scientific notation (2.000e+4 to 1.400e+5). Grid
+    lines are subtle and dashed on the y-axis only. Each department shows distinct
+    distribution shapes - Engineering has tight distribution around 85k, Marketing
+    around 65k, Sales shows wider spread around 70k, and Support is narrower around
+    50k.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are readable, though scientific
+          notation on y-axis is slightly harder to interpret than formatted currency
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all elements clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violins are well-sized, box plots visible inside, whiskers clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are colorblind-safe, good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, slight extra whitespace on right side
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Annual Salary (USD)", "Department"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed, but y-grid uses scientific notation which reduces
+          readability
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with KDE
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has quartile markers, mirrored density, median line as per spec
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A (no legend needed, categories clear on x-axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "violin-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes (Engineering tight, Sales wide,
+          Support narrow), different means, demonstrates violin plot strengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary by department is a real, comprehensible scenario mentioned
+          in spec applications
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary values are realistic (50k-85k means with appropriate standard
+          deviations)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Implements violin from scratch using patches rather than using Bokeh's
+          built-in capabilities; while technically correct, doesn't showcase Bokeh-specific
+          features
+  verdict: APPROVED
diff --git a/plots/violin-basic/metadata/highcharts.yaml b/plots/violin-basic/metadata/highcharts.yaml
index 9e28e25be6..2b9d6873de 100644
--- a/plots/violin-basic/metadata/highcharts.yaml
+++ b/plots/violin-basic/metadata/highcharts.yaml
@@ -27,3 +27,188 @@ review:
   - The IQR boxes are very thin (box_width=0.05) and could be slightly wider for better
     visibility
   - Grid lines could be even more subtle (currently alpha=0.15, could use 0.1)
+  image_description: 'The plot displays four violin shapes representing Study Groups
+    A, B, C, and D on a white background. Each violin uses a distinct colorblind-safe
+    color: blue (#306998) for Group A, yellow (#FFD43B) for Group B, purple (#9467BD)
+    for Group C, and cyan (#17BECF) for Group D. Each violin features a centered black
+    IQR (interquartile range) box and a red diamond marker indicating the median.
+    The title "violin-basic · highcharts · pyplots.ai" appears at the top in bold.
+    The Y-axis labeled "Test Score (points)" ranges from 0-130, and the X-axis labeled
+    "Study Group" shows the four category labels. A legend at the bottom identifies
+    each group color and the median marker. The violins effectively demonstrate different
+    distribution shapes: Group A shows a symmetric normal distribution, Group B displays
+    clear bimodality with two bulges, Group C is normally distributed, and Group D
+    exhibits right-skewed exponential distribution with a long upper tail reaching
+    ~120.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          size. Font sizes are appropriately scaled for 4800x2700 resolution.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Category labels, axis titles, and legend
+          are all clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violin shapes are clearly visible with good fill opacity. IQR boxes
+          and median markers are well-sized and prominent.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, purple, cyan) - no red-green
+          conflicts.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good overall layout with appropriate margins. Slight excess whitespace
+          at top of chart above Group D's extended distribution.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Test Score (points)" and "Study
+          Group".'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle. Legend is positioned at bottom but could be placed
+          more optimally.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot type with kernel density estimation shown via
+          polygon shapes.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis correctly assigned.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Shows quartile markers (IQR box), median line (diamond marker), mirrored
+          density on both sides. Split violin comparison not shown but spec says "consider"
+          not required.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within the axis ranges (0-130 appropriately covers
+          all distributions).
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all groups and median marker.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "violin-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Excellent variety showing normal (A, C), bimodal (B), and skewed
+          (D) distributions. Demonstrates the plot type's ability to reveal distribution
+          shapes.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores by study group is a plausible, real-world scenario that
+          makes sense for comparing distributions.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores in 0-130 range with most values in 20-100 range are realistic
+          and sensible.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Follows imports → data → plot → save structure, but code is more
+          complex than strictly necessary due to manual polygon construction for violins.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html correctly.
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of PolygonSeries and ScatterSeries to construct custom violin
+          shapes. Uses highcharts-more.js for polygon support. Interactive HTML output
+          included.
+  verdict: APPROVED
diff --git a/plots/violin-basic/metadata/letsplot.yaml b/plots/violin-basic/metadata/letsplot.yaml
index bd757e0be4..f7ec58c68d 100644
--- a/plots/violin-basic/metadata/letsplot.yaml
+++ b/plots/violin-basic/metadata/letsplot.yaml
@@ -27,3 +27,178 @@ review:
     confusion about category groupings
   - Star import (from lets_plot import *) is functional but less explicit than named
     imports
+  image_description: 'The plot displays four violin shapes representing salary distributions
+    across four departments: Engineering, Marketing, Sales, and Design. The violins
+    are colored in a two-color palette alternating between blue (#306998) and yellow
+    (#FFD43B). Each violin shows the kernel density estimation with mirrored sides,
+    and horizontal lines at three positions (25th percentile, median, and 75th percentile)
+    are visible within each violin. The title "violin-basic · lets-plot · pyplots.ai"
+    appears at the top. The y-axis shows "Salary ($)" ranging from 0 to 170,000, and
+    the x-axis shows "Department" with the four category labels. The layout uses a
+    minimal theme with a white background and subtle gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          size with appropriate font sizes (title 24, labels 20, ticks 16)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violins are appropriately sized and visible with good alpha (0.8)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe; however, using only 2 colors
+          for 4 categories makes adjacent categories share colors
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, appropriate whitespace, nothing cut off
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Salary ($)" with units, X-axis has "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and appropriate; legend correctly hidden since x-axis
+          shows categories; minimal theme works well
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has quartile markers (25th, median, 75th), mirrored density on both
+          sides, median line. Does not show split violins for comparing two conditions
+          (noted as "consider" in spec)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden since x-axis shows categories
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Title is "violin-basic · lets-plot · pyplots.ai" but should use "letsplot"
+          not "lets-plot" per library name convention
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes (Engineering peaked high, Sales
+          wide spread, Marketing moderate), but distributions could show more distinct
+          shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary distributions by department is a real, comprehensible scenario
+          matching spec application "Comparing salary distributions across job titles"
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary values ($30k-$170k) are realistic for professional roles
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure without functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used; noqa comments for star import are appropriate but
+          star import is not ideal
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.html)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2-style grammar, quantiles parameter, quantile_lines,
+          scale_fill_manual, theme customization. Could leverage more lets-plot specific
+          features like interactivity options or additional styling.
+  verdict: APPROVED
diff --git a/plots/violin-basic/metadata/matplotlib.yaml b/plots/violin-basic/metadata/matplotlib.yaml
index c8f069f4dd..d1b2efbf1a 100644
--- a/plots/violin-basic/metadata/matplotlib.yaml
+++ b/plots/violin-basic/metadata/matplotlib.yaml
@@ -24,3 +24,177 @@ review:
   - Could use matplotlib built-in quartile visualization (showquartiles parameter)
     instead of manual vlines
   - Missing legend element (though not critical for this plot type)
+  image_description: The plot displays four violin plots comparing test score distributions
+    across four schools (School A, B, C, D). The violins are rendered in a muted blue
+    color (#306998) with slight transparency (alpha 0.7). Each violin shows the kernel
+    density estimation shape, with School C being the widest (indicating highest variance)
+    and School B being narrower (lower variance). Yellow horizontal lines mark the
+    median values inside each violin. Dark blue vertical bars indicate the interquartile
+    range (Q1 to Q3). Vertical lines extend to min/max values. The title "violin-basic
+    · matplotlib · pyplots.ai" appears at the top. X-axis is labeled "School" and
+    Y-axis is labeled "Test Score (points)". A subtle horizontal dashed grid is present.
+    The layout is well-balanced with good proportions.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violin shapes clearly visible, appropriate alpha for density visualization
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Python blue is colorblind-safe, good contrast, but single color palette
+          is simple
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent proportions, no cut-off, good whitespace
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Test Score (points)" and "School" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle at alpha=0.3, but no legend present (not strictly
+          needed for this plot type)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with kernel density estimation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows quartile markers (IQR bars), mirrored density on both sides,
+          median line (yellow)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, y-axis shows full range ~35-115
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed as categories are on x-axis
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "violin-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes (narrow vs wide), different centers
+          (68-82), different spreads. Could show more extreme outliers or bimodal
+          distributions.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores across schools is a perfect, comprehensible real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Test scores in 35-115 range are reasonable, though some exceed 100
+          which is unusual for typical test scoring
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) ensures reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses basic violinplot() but doesn't leverage matplotlib's more advanced
+          violin customization like bw_method for bandwidth, widths parameter, or
+          vert parameter. The manual IQR overlay is a good touch but could use matplotlib's
+          built-in showquartiles parameter.
+  verdict: APPROVED
diff --git a/plots/violin-basic/metadata/plotly.yaml b/plots/violin-basic/metadata/plotly.yaml
index 3bdad84980..41d8071890 100644
--- a/plots/violin-basic/metadata/plotly.yaml
+++ b/plots/violin-basic/metadata/plotly.yaml
@@ -26,3 +26,162 @@ review:
   - Could leverage more Plotly-specific features like custom hover templates
   - Data could include one bimodal distribution to better showcase violin plot capabilities
     over box plots
+  image_description: 'The plot displays four violin plots representing salary distributions
+    across four departments: Engineering, Marketing, Sales, and Support. Each violin
+    shows a mirrored kernel density estimation with an embedded white box plot showing
+    quartiles and a dashed median line. The color scheme alternates between blue (#306998,
+    #4B8BBE) for Engineering and Sales, and yellow (#FFD43B, #FFE873) for Marketing
+    and Support. The title "violin-basic · plotly · pyplots.ai" is centered at the
+    top. The y-axis shows "Annual Salary ($)" ranging from ~20,000 to ~140,000 with
+    comma-formatted tick labels. The x-axis shows "Department" with the four category
+    labels. The layout uses a clean white template with subtle gridlines. Sales shows
+    the widest distribution (highest variance), while Support shows the narrowest
+    and lowest salary range.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, ticks at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violin shapes clearly visible with appropriate opacity (0.7), box
+          plots well-sized
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow palette is colorblind-safe, though two similar blues
+          and two similar yellows reduce distinction slightly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, proper margins, well-centered
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Annual Salary ($)", X-axis is descriptive "Department"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has quartile markers (box), mirrored density, median line (meanline)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this plot type (categories labeled on axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "violin-basic · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes and spreads well; Sales has high
+          variance, Support has low variance and lower mean - demonstrates violin
+          advantages. Could show more multimodality.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary by department is a perfect realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Salary values are realistic for US departments; Support slightly
+          low but plausible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/violin-basic/metadata/plotnine.yaml b/plots/violin-basic/metadata/plotnine.yaml
index 0c20a04854..b62d490c90 100644
--- a/plots/violin-basic/metadata/plotnine.yaml
+++ b/plots/violin-basic/metadata/plotnine.yaml
@@ -26,3 +26,177 @@ review:
   - All distributions are normal - adding one skewed or bimodal distribution would
     better demonstrate violin plot capabilities
   - Axis labels lack context or units (e.g., Test Score instead of Value)
+  image_description: 'The plot displays four violin plots arranged horizontally, each
+    representing a different category (Group A, Group B, Group C, Group D). The violins
+    show kernel density estimations with mirrored sides, and each contains three horizontal
+    lines indicating the 25th, 50th (median), and 75th percentiles. The color scheme
+    alternates between blue (#306998, #4B8BBE) for Groups A and C, and yellow (#FFD43B,
+    #FFE873) for Groups B and D. The title "violin-basic · plotnine · pyplots.ai"
+    appears at the top in a large font. The x-axis is labeled "Category" and the y-axis
+    "Value". The distributions vary visibly: Group A is centered around 50, Group
+    B around 65 with wider spread, Group C is narrower around 45, and Group D around
+    70 with moderate spread. The minimal theme provides a clean white background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear, axis labels and tick labels are all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all category labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violins are well-sized and clearly visible, quantile lines are visible
+          but could be slightly more prominent
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/yellow palette has good contrast and is distinguishable, though
+          could use a more standard colorblind-safe palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions with appropriate whitespace, 16:9 aspect ratio used
+          effectively
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Category", "Value") but lack units or context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No visible grid (minimal theme), legend hidden which is fine since
+          colors don't add information
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with kernel density estimation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows quartile markers (25%, 50%, 75%), mirrored density, median
+          line present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden since fill colors match x-axis categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "violin-basic · plotnine · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows different distributions (different means, spreads), but all
+          are normal distributions - could show some skewed or bimodal data
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Generic groups are plausible but not tied to a real scenario like
+          the spec suggests (salary, test scores, etc.)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values in reasonable range (25-125), 200 points per category is appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar with geom_violin and draw_quantiles parameter,
+          theme_minimal, proper plotnine idioms. Could leverage more advanced features
+          like faceting or custom stats.
+  verdict: APPROVED
diff --git a/plots/violin-basic/metadata/pygal.yaml b/plots/violin-basic/metadata/pygal.yaml
index 7277dc8311..729a4bbb45 100644
--- a/plots/violin-basic/metadata/pygal.yaml
+++ b/plots/violin-basic/metadata/pygal.yaml
@@ -26,3 +26,173 @@ review:
   - Quartile boxes inside violins are quite small and subtle, could be more prominent
   - Performance scores exceeding 100 seems unusual for a score context (typically
     0-100)
+  image_description: 'The plot displays four violin shapes representing performance
+    score distributions for Engineering, Marketing, Sales, and Operations departments.
+    Each violin is rendered in a distinct color: blue (Engineering), coral/orange
+    (Marketing), dark gray (Sales), and olive/gold (Operations). The violins show
+    mirrored kernel density estimates with small rectangular quartile boxes inside
+    and horizontal median lines. The title "violin-basic · pygal · pyplots.ai" appears
+    at the top. X-axis shows "Category" with department labels, Y-axis shows "Performance
+    Score" ranging from 20 to 130. A legend at the bottom repeats the category names
+    with color indicators.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are clearly readable at full size, well-scaled for
+          4800x2700
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all elements clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violins are well-sized and visible; quartile boxes could be slightly
+          larger for better visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors that are colorblind-friendly (no red-green conflict)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, proper spacing between violins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("Performance Score", "Category") but lack
+          units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend is redundant (same info as x-axis labels), grid is subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot implementation using KDE
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has mirrored density, quartile markers, median lines as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (20-130) shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels match category names
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "violin-basic · pygal · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distributions with varying means and spreads; Sales
+          has widest spread showing variance well
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Performance scores by department is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores in 40-130 range are reasonable but some values exceed 100
+          which is unusual for "scores"
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save (no functions/classes)'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style are imported, all used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Creative workaround using XY chart since pygal lacks native violin
+          support; custom KDE implementation shows ingenuity but doesn't leverage
+          pygal-specific features
+  verdict: APPROVED
diff --git a/plots/violin-basic/metadata/seaborn.yaml b/plots/violin-basic/metadata/seaborn.yaml
index 8e3a330100..2b2aa5c697 100644
--- a/plots/violin-basic/metadata/seaborn.yaml
+++ b/plots/violin-basic/metadata/seaborn.yaml
@@ -25,3 +25,174 @@ review:
   - Could use inner="quartile" to show individual quartile lines more distinctly,
     or add miniature box plots with inner="box"
   - Missing y-axis grid extension through violins (grid only on axis area)
+  image_description: The plot displays four violin plots comparing salary distributions
+    across four departments (Engineering, Marketing, Sales, Support). The violins
+    alternate in color between a muted blue (#306998) and golden yellow (#FFD43B).
+    Each violin shows the kernel density estimation with mirrored sides, clearly revealing
+    distribution shapes. Inside each violin, dotted lines indicate quartiles (Q1,
+    Q3) and a dashed line shows the median. The y-axis displays "Salary ($)" with
+    currency formatting ranging from $20k to $120k. The x-axis shows "Department"
+    with category labels. The title correctly follows the format "violin-basic · seaborn
+    · pyplots.ai". The Sales department violin clearly shows a bimodal distribution
+    (two peaks around $55k and $90k), demonstrating the ability to reveal complex
+    distribution shapes. A subtle horizontal grid with alpha=0.3 aids readability.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, labels at 20pt, ticks at 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all elements clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violins well-sized, quartile lines clearly visible inside
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue/gold palette is colorblind-safe (not red-green), good contrast,
+          though alternating could be clearer
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Well-proportioned layout with good use of space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Salary ($)" and "Department"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3), but y-axis only; no legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows quartiles (inner="quart"), median line, mirrored density on
+          both sides
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range from ~$20k to ~$130k
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed/shown (categories on x-axis), legend=False is appropriate
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "violin-basic · seaborn · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows different distribution shapes: normal (Engineering, Marketing,
+          Support), bimodal (Sales) - demonstrates the key violin feature'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Salary by department is plausible; specific values are reasonable
+          though somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary ranges ($30k-$130k) are realistic for the context
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Flat structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's violinplot with inner="quart", correct hue/palette
+          usage, but doesn't showcase more advanced features like split violins or
+          cut parameter
+  verdict: APPROVED
diff --git a/plots/violin-box/metadata/altair.yaml b/plots/violin-box/metadata/altair.yaml
index 201a4309b3..b5d5c02954 100644
--- a/plots/violin-box/metadata/altair.yaml
+++ b/plots/violin-box/metadata/altair.yaml
@@ -25,3 +25,183 @@ review:
     visibility
   - Legend placement in top-right leaves some empty space; could be integrated more
     tightly
+  image_description: 'The plot displays four violin plots with embedded box plots
+    showing server response times across four tiers (Basic, Standard, Premium, Enterprise).
+    Each violin is distinctly colored: Basic in blue, Standard in yellow, Premium
+    in light blue, and Enterprise in green. The violins show smooth KDE distributions
+    with dark gray box plots centered inside, displaying median (white horizontal
+    line), quartiles (gray box), and whiskers extending to min/max values. The title
+    "violin-box · altair · pyplots.ai" appears centered at the top in large font.
+    The Y-axis is labeled "Response Time (ms)" ranging from 0-650ms. Category labels
+    appear below each violin. A color legend for "Server Tier" is positioned in the
+    top right corner.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable. Font sizes
+          are appropriate for the canvas size.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels are clearly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violins and box plots are clearly visible. Box plots could be slightly
+          more prominent but are adequately visible.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are colorblind-safe with good contrast. Blue, yellow, light
+          blue, and green are distinguishable.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space with faceted layout. Plot elements are well-proportioned.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "Response Time (ms)". X-axis
+          uses category names (no explicit axis label needed for faceted categorical).
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3). Legend is placed well but could be closer
+          to the data.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct: violin plot with embedded box plot.'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Numeric variable (Response Time) on Y-axis, categorical (Server Tier)
+          creates facets.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows KDE distribution (violin), median, quartiles (box), and whiskers
+          as specified.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data is visible; axes show full range.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all four server tiers.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "violin-box · altair · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Excellent variety: Basic shows right-skewed exponential distribution,
+          Standard shows normal distribution, Premium shows tight normal, Enterprise
+          shows bimodal distribution. Shows different distribution shapes well.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response times is a real, neutral business/technology scenario.
+          The progression from Basic (high latency) to Enterprise (low latency with
+          caching) is realistic.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times in ms are realistic (25-600ms range). Values are plausible
+          for server tiers.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → violin layer → box layer
+          → combined chart → save.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API.
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html (correct for Altair).
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Uses Altair''s declarative grammar well: `transform_density` for
+          KDE, `mark_area` with `orient="horizontal"` and `stack="center"` for violin
+          shape, `mark_boxplot` with customization, layering with `alt.layer`, and
+          faceting with `alt.Column`. Good use of Altair''s compositional approach.'
+  verdict: APPROVED
diff --git a/plots/violin-box/metadata/bokeh.yaml b/plots/violin-box/metadata/bokeh.yaml
index ec3a1b3cdf..e6bdf6a6a2 100644
--- a/plots/violin-box/metadata/bokeh.yaml
+++ b/plots/violin-box/metadata/bokeh.yaml
@@ -27,3 +27,175 @@ review:
     the main plot
   - Outlier markers could be slightly larger for better visibility at full resolution
   - Grid/legend positioning could be improved by placing legend inside the plot area
+  image_description: The plot displays four violin plots for study methods A, B, C,
+    and D on a white background. Each violin is rendered in a blue color (#306998,
+    Python blue) with KDE-based distribution shapes clearly visible. Inside each violin
+    is a yellow/golden box plot (#FFD43B) showing the IQR (Q1-Q3), with a black median
+    line. Whiskers extend from the boxes to the whisker limits, with small caps. White
+    circular outliers with black borders are displayed for Methods A, B, C, and D.
+    The title "violin-box · bokeh · pyplots.ai" appears at the top left. The x-axis
+    is labeled "Study Method" and the y-axis is labeled "Test Score" ranging from
+    0 to 120. A legend on the right side shows "Distribution (KDE)", "IQR (Q1-Q3)",
+    "Median", and "Outliers". Method D shows a bimodal distribution with a lower secondary
+    peak. The grid is subtle (dashed, low alpha) on the y-axis only.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all excellently
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violins, boxes, and outliers all properly sized and visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions, but legend placed outside plot area creates some
+          imbalance
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels "Study Method" and "Test Score"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is placed to the right outside plot area which is acceptable,
+          but the y-axis grid extends above 100 (to 120) while data is all below 110
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with embedded box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical x-axis, numeric y-axis correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Violin (KDE), box (IQR), median line, whiskers, and outliers all
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly describes all components
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "violin-box · bokeh · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows multiple distribution shapes (normal, bimodal in Method D),
+          different spreads, and outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Educational test scores by study method is neutral and comprehensible
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Test scores 0-100 range is realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Custom KDE implementation rather than using Bokeh's built-in capabilities;
+          does not leverage ColumnDataSource as effectively as possible for the violin
+          shapes
+  verdict: APPROVED
diff --git a/plots/violin-box/metadata/highcharts.yaml b/plots/violin-box/metadata/highcharts.yaml
index 59ecc491f4..bbe674bf9b 100644
--- a/plots/violin-box/metadata/highcharts.yaml
+++ b/plots/violin-box/metadata/highcharts.yaml
@@ -23,3 +23,176 @@ review:
   weaknesses:
   - No grid lines visible, which would improve value readability
   - Some unused whitespace at top of plot area
+  image_description: The plot displays four violin shapes (representing KDE distributions)
+    for Control (blue), Treatment A (yellow), Treatment B (purple), and Treatment
+    C (cyan). Each violin has an embedded black box plot showing median (white circle),
+    quartiles (rectangular box), and whiskers (vertical lines with caps). Treatment
+    C shows red diamond outliers above the main distribution. The title "violin-box
+    · highcharts · pyplots.ai" appears at the top. The Y-axis is labeled "Response
+    Value (units)" and shows values from ~14 to ~110. The X-axis is labeled "Group"
+    with the four category names. A legend at the bottom shows all series including
+    Median and Outliers.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: large title, readable axis labels
+          and tick marks'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are distinct
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violin shapes well-sized, box plots clearly visible, outliers marked
+          distinctly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette used (blue, yellow, purple, cyan - no red-green
+          conflict)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good proportions but some wasted space at the top of the plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Response Value (units)", X-axis has clear "Group"
+          label
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No visible grid lines; legend is present and well-placed but grid
+          would help readability
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with embedded box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical groups on X, numeric values on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All features present: violin KDE shape, box with quartiles, median
+          marker, whiskers, outliers'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "violin-box · highcharts · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: normal (Control), bimodal (Treatment A), narrow
+          (Treatment B), skewed with outliers (Treatment C)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Plausible treatment comparison scenario but generic (not a specific
+          real-world example)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response values in sensible 15-105 range
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script structure, no classes/functions
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: 'Unused import: pathlib Path only used for cleanup, scipy stats imported
+          correctly'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current highcharts-core API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of PolygonSeries for violins, ScatterSeries for markers,
+          proper use of highcharts-more.js
+  verdict: APPROVED
diff --git a/plots/violin-box/metadata/letsplot.yaml b/plots/violin-box/metadata/letsplot.yaml
index 11f18d7bb8..d963fb7ec3 100644
--- a/plots/violin-box/metadata/letsplot.yaml
+++ b/plots/violin-box/metadata/letsplot.yaml
@@ -26,3 +26,174 @@ review:
     keeping a legend could add clarity
   - Could leverage lets-plot interactive tooltip features for the HTML export
   - Bimodal distribution in Premium group is somewhat subtle, could be more pronounced
+  image_description: 'The plot displays a violin plot with embedded box plots showing
+    server response times (ms) across four server configurations: Standard, Optimized,
+    Premium, and Enterprise. Each violin is colored distinctly - Standard is blue
+    (#306998), Optimized is yellow (#FFD43B), Premium is teal (#4A90A4), and Enterprise
+    is green (#7CB342). White box plots are centered inside each violin showing median,
+    quartiles, and whiskers. Outliers are displayed as hollow circles. The title "violin-box
+    · letsplot · pyplots.ai" appears at the top. Y-axis shows "Response Time (ms)"
+    ranging from 0-140, X-axis shows "Server Configuration". The plot uses a minimal
+    theme with subtle horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and ~24pt, axis labels ~20pt, tick labels ~16pt, all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Violins and box plots clearly visible, outliers shown as hollow
+          circles. Minor deduction: box plots could be slightly wider for better visibility'
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors that are colorblind-safe (blue, yellow, teal,
+          green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Response Time (ms)" with units, "Server Configuration" descriptive'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), but legend is hidden when colors are mapped
+          to categories - this is acceptable since x-axis labels serve as legend
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin + box plot combination
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has violin (KDE), box plot (median, quartiles, whiskers), and outliers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data from 0-140ms
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: X-axis labels serve as categorical identifier
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "violin-box · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows different distribution shapes: unimodal (Standard, Enterprise),
+          bimodal (Premium visible), with outliers. Minor: bimodal in Premium could
+          be more pronounced'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response times is a real, neutral, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Response times 20-130ms are realistic, though range is slightly wide
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, lets_plot used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html, but uses path="." which is correct
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2-style grammar with geom_violin and geom_boxplot layering,
+          theme_minimal, scale_fill_manual. Could leverage more lets-plot specific
+          features like tooltips for interactivity
+  verdict: APPROVED
diff --git a/plots/violin-box/metadata/matplotlib.yaml b/plots/violin-box/metadata/matplotlib.yaml
index 1152f9d067..9984d9cb9d 100644
--- a/plots/violin-box/metadata/matplotlib.yaml
+++ b/plots/violin-box/metadata/matplotlib.yaml
@@ -25,3 +25,172 @@ review:
     box = summary statistics)
   - Could use matplotlib's more advanced violin customization options like showing
     quantiles directly in the violin
+  image_description: 'The plot shows a violin plot with embedded box plots for 4 experimental
+    groups (Control, Treatment A, Treatment B, Treatment C). The violins are rendered
+    in Python blue (#306998) with 70% opacity, showing KDE density shapes. Inside
+    each violin is a narrow yellow (#FFD43B) box plot showing median (red line), quartiles
+    (box), and whiskers. The plot has a subtle dashed y-axis grid with alpha 0.3.
+    The title reads "violin-box · matplotlib · pyplots.ai" in large font at the top.
+    X-axis is labeled "Experimental Group" and Y-axis "Response Value (units)". All
+    four groups show distinctly different distributions: Control is symmetric/normal,
+    Treatment A shows bimodal distribution, Treatment B shows right-skewed distribution
+    with upper outliers, and Treatment C shows wide spread with both upper and lower
+    outliers visible as blue dots.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, labels 20pt, ticks 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violins and box plots well-sized, good alpha for visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow/red color scheme is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive with units: "Response Value (units)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle and well-done, but no legend present to explain colors
+          (violins vs boxes)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Correct chart type: violin plot with embedded box plot'
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical groups on X, numeric values on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features: violin (KDE), box (median, quartiles, whiskers),
+          outliers as points'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis 0-110 shows all data including outliers
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A - no legend needed for this visualization type
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "violin-box · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent: symmetric (Control), bimodal (Treatment A), skewed (Treatment
+          B), wide with outliers (Treatment C)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Neutral scientific scenario (experimental groups with response values)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic values 0-100 range for response measurements
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses tick_labels not deprecated labels parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses violinplot and boxplot correctly but overlaying them manually
+          is a workaround rather than using a more distinctive feature
+  verdict: APPROVED
diff --git a/plots/violin-box/metadata/plotly.yaml b/plots/violin-box/metadata/plotly.yaml
index 78acfa36b3..f19a5105f3 100644
--- a/plots/violin-box/metadata/plotly.yaml
+++ b/plots/violin-box/metadata/plotly.yaml
@@ -25,3 +25,180 @@ review:
   - Outlier markers are small (size 8); increasing to 10-12 would improve visibility
   - Yellow color for Interactive group may have reduced contrast for some colorblind
     users
+  image_description: 'The plot displays four violin plots with embedded box plots,
+    comparing test scores across four teaching methods: Traditional (blue), Interactive
+    (yellow), Online (light blue), and Hybrid (pink). Each violin shows the kernel
+    density estimate (KDE) of the distribution with a white box plot embedded inside
+    showing median, quartiles, and whiskers. The title "violin-box · plotly · pyplots.ai"
+    is centered at the top in black text. The y-axis shows "Test Score (points)" ranging
+    from 0 to ~105, and the x-axis shows "Teaching Method" with the four category
+    labels. The background is clean white with subtle horizontal gridlines. Outliers
+    are visible as small dots (one below Traditional around 30, one below Hybrid around
+    27). The Online group shows a bimodal distribution shape. Mean lines (red) are
+    visible within each violin.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          output resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violins well-sized, box plots visible inside, outliers shown as points;
+          minor deduction as outlier markers could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good color differentiation, though yellow and light blue could be
+          challenging for some colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Test Score (points)", X-axis descriptive "Teaching
+          Method"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend present (acceptable since categories are on x-axis), but
+          grid could be more subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with embedded box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Numeric values on Y, categorical groups on X
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows KDE (violin shape), median, quartiles (box), whiskers, and
+          outliers as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range 0-105 shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; categories clearly labeled on x-axis
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "violin-box · plotly · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Excellent variety: normal distributions, bimodal (Online), different
+          spreads, outliers present; minor deduction as more distinct outliers could
+          strengthen demonstration'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Education/teaching methods comparison is neutral, realistic, and
+          relatable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Test scores 0-100 are realistic; some clipping at boundaries may
+          slightly affect distribution shapes
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Violin with box_visible, meanline_visible, and points options;
+          saves interactive HTML; could leverage more Plotly-specific features like
+          hover customization
+  verdict: APPROVED
diff --git a/plots/violin-box/metadata/plotnine.yaml b/plots/violin-box/metadata/plotnine.yaml
index adeacd2468..589fe1c208 100644
--- a/plots/violin-box/metadata/plotnine.yaml
+++ b/plots/violin-box/metadata/plotnine.yaml
@@ -24,3 +24,172 @@ review:
   - Does not showcase plotnine-specific features beyond basic geoms (could use faceting,
     stat_summary, or position adjustments)
   - Axis labels lack units (e.g., Score (points) or Score (0-100))
+  image_description: The plot displays a violin plot with embedded box plots for four
+    product categories (A, B, C, D). Colors used are Python Blue (#306998) for Product
+    A, Yellow (#FFD43B) for Product B, Light Blue (#6A9BC9) for Product C, and Muted
+    Gold (#D4A84B) for Product D. Each violin shows the distribution shape with white
+    box plots centered inside showing median (horizontal line), quartiles (box), and
+    whiskers. Product B shows outlier points above the upper whisker. The title "violin-box
+    · plotnine · pyplots.ai" is at the top, x-axis is labeled "Product Category",
+    and y-axis is labeled "Customer Satisfaction Score" with values ranging from ~30
+    to ~85. The plot has a minimal theme with subtle grid lines.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable at the
+          target resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violins and box plots are well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors that are colorblind-friendly (blue, yellow,
+          light blue, gold)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas space with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (satisfaction scores could use "points"
+          or similar)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle, legend correctly removed since x-axis shows categories
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with embedded box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Groups on x-axis, values on y-axis as expected
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows KDE shape, median, quartiles, whiskers, and outliers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately hidden (categories on x-axis)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses `violin-box · plotnine · pyplots.ai`
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: normal (A), right-skewed with outliers (B), bimodal
+          (C), uniform (D)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Customer satisfaction scores for products is a neutral, realistic
+          business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Satisfaction scores in 30-85 range are realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as `plot.png`
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses plotnine's grammar of graphics correctly but doesn't showcase
+          unique features like `stat_*` functions or faceting
+  verdict: APPROVED
diff --git a/plots/violin-box/metadata/pygal.yaml b/plots/violin-box/metadata/pygal.yaml
index 124714e344..c840975d58 100644
--- a/plots/violin-box/metadata/pygal.yaml
+++ b/plots/violin-box/metadata/pygal.yaml
@@ -24,3 +24,185 @@ review:
   - Y-axis range extends to 140 despite data being clipped to 0-100, wasting vertical
     space
   - Legend mixes category names with box plot element names, creating visual complexity
+  image_description: 'The plot displays four violin plots representing Performance
+    Scores (0-100 scale) across four departments: Engineering (blue, #306998), Marketing
+    (yellow, #FFD43B), Sales (green, #4CAF50), and Operations (orange, #FF5722). Each
+    violin shows the KDE distribution shape with an embedded white box plot showing
+    the IQR (Q1-Q3). Gray horizontal lines indicate medians, and vertical whiskers
+    extend to the data bounds. Black dots represent outliers (visible for Engineering
+    at ~53, ~115, ~118 and Marketing at ~14, ~143). The title "violin-box · pygal
+    · pyplots.ai" is at the top. X-axis shows "Department" with labels, Y-axis shows
+    "Performance Score (0-100 scale)" ranging from 0 to 140. A legend appears on the
+    left side listing Engineering, IQR Box, Whiskers, Median Line, Outliers, Marketing,
+    Sales, and Operations with corresponding color markers.'
+  criteria_checklist:
+    visual_quality:
+      score: 34
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are clearly readable at full resolution.
+          Font sizes are well-scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. X-axis labels are well-spaced, legend
+          doesn't overlap data.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violins, boxes, whiskers, medians, and outliers are all clearly visible.
+          Outlier dots are sized appropriately (18px).
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue, yellow, green, orange palette is colorblind-friendly with good
+          contrast.
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good central placement, but the legend on the left side is somewhat
+          cluttered with many entries. Plot fills canvas adequately.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "Performance Score (0-100
+          scale)", X-axis labeled "Department".
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is cluttered with too many entries (some labeled "None" entries
+          may appear). The legend organization could be cleaner.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with embedded box plot implementation.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical groups on X-axis, numeric values on Y-axis correctly
+          mapped.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All required features present: KDE violin shape, box showing Q1-Q3,
+          whiskers, median line, outliers shown as points.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range (0-140) appropriately shows all data including outliers.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend shows multiple entries per category due to the series-based
+          approach. Some entries appear redundant.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "violin-box · pygal · pyplots.ai" format.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes across departments (Engineering
+          tight, Sales wide), includes outliers demonstrating feature coverage. Operations
+          shows narrower distribution.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: '"Performance scores by department" is a realistic, neutral business
+          scenario.'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores mostly in 0-100 range as expected, though some outliers exceed
+          100 due to Y-axis range extending to 140.
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Code follows imports → data → plot → save structure. Uses a loop
+          but no functions/classes.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for deterministic data.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pygal, and Style are imported; all are used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses pygal's XY chart with custom Style, fill and stroke options,
+          legend positioning. Manual KDE implementation is necessary but not a library
+          distinctive feature.
+  verdict: APPROVED
diff --git a/plots/violin-box/metadata/seaborn.yaml b/plots/violin-box/metadata/seaborn.yaml
index 0cea786e7d..b6f4bb0423 100644
--- a/plots/violin-box/metadata/seaborn.yaml
+++ b/plots/violin-box/metadata/seaborn.yaml
@@ -23,3 +23,174 @@ review:
   - Grid only on y-axis, though acceptable for categorical x-axis
   - Legend disabled which is fine since batch names are on x-axis, but a small legend
     could enhance standalone viewing
+  image_description: 'The plot displays four violin plots with embedded box plots
+    comparing Quality Scores (0-100) across four manufacturing batches (A, B, C, D).
+    Each violin uses a distinct color: Batch A in steel blue (#306998), Batch B in
+    yellow (#FFD43B), Batch C in teal (#4A90A4), and Batch D in orange (#E8A838).
+    The violin shapes clearly show different distribution characteristics: Batch A
+    shows a symmetric normal distribution centered around 75, Batch B exhibits a bimodal
+    distribution with two peaks (around 60 and 80), Batch C shows a right-skewed distribution
+    with concentration at lower values, and Batch D displays a left-skewed distribution
+    concentrated at higher values. Each violin contains a visible black box plot with
+    white median markers, quartile boxes, and whiskers. The title "violin-box · seaborn
+    · pyplots.ai" is displayed prominently at the top. X-axis is labeled "Manufacturing
+    Batch" and Y-axis shows "Quality Score (0-100)" with a subtle dashed grid on the
+    y-axis.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, labels 20pt, ticks 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, clean spacing between violins
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violins are well-sized, box plots clearly visible inside
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors that are colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "(0-100)", X-axis descriptive "Manufacturing Batch"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid subtle (alpha 0.3) which is good, but no legend present (though
+          not strictly needed as colors match x-axis labels)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct violin plot with embedded box plot
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categorical on X, numeric on Y
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: KDE shape visible, median/quartiles/whiskers shown via box
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range (30-100)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed, colors match x-axis batch labels
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "violin-box · seaborn · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Excellent variety: normal (A), bimodal (B), right-skewed (C), left-skewed
+          (D)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Manufacturing quality scores is a realistic, neutral business scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Quality scores 0-100 are realistic for manufacturing QC
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib, numpy, pandas, seaborn)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses modern seaborn API with hue parameter correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with proper dpi
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of seaborn's violinplot with inner="box" parameter,
+          proper hue handling to avoid deprecation warnings, saturation parameter
+          for color intensity
+  verdict: APPROVED
diff --git a/plots/violin-split/metadata/altair.yaml b/plots/violin-split/metadata/altair.yaml
index 3b102288b2..8f358e951c 100644
--- a/plots/violin-split/metadata/altair.yaml
+++ b/plots/violin-split/metadata/altair.yaml
@@ -23,3 +23,175 @@ review:
   - Grid lines appear at default opacity rather than subtle (alpha 0.2-0.4)
   - The IQR and median markers use xOffset which places them slightly offset from
     violin center rather than overlaid on the violin halves
+  image_description: 'The plot displays a split violin visualization across 4 faceted
+    panels (Engineering, HR, Marketing, Sales). Each panel shows two distributions
+    side-by-side within split violins: Control group (blue, #306998) on the left half
+    and Treatment group (yellow, #FFD43B) on the right half. The violins meet at the
+    center line and include inner IQR rules (dark vertical lines) and white diamond
+    median markers. The title "violin-split · altair · pyplots.ai" appears at the
+    top center. A legend on the right indicates the two groups. Y-axis shows "Score
+    (%)" ranging from 30-100, and each department is labeled above its respective
+    panel.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title 28pt, axis labels 16-20pt, all clearly readable. Slightly smaller
+          than ideal for tick labels.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violin shapes well-defined, median markers and IQR lines clearly
+          visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow contrast is colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas via faceting, though some whitespace at bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Score (%)" includes units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is visible but no alpha applied (appears at default opacity),
+          legend well-placed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct split violin plot with two halves per category
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Category on facet columns, value on Y, split group determines left/right
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Split violins, distinct colors, legend, inner quartile markers all
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full 30-100 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Control and Treatment
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "violin-split · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes and shifts between groups, Engineering
+          shows largest treatment effect
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Test scores by department comparing control vs treatment is a plausible
+          A/B testing scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores 30-100% reasonable, though some distributions compressed at
+          upper bound
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves both plot.png and plot.html (correct)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses transform_density, transform_calculate, faceting, layering -
+          good Altair patterns but xOffset approach for IQR/median markers is somewhat
+          complex
+  verdict: APPROVED
diff --git a/plots/violin-split/metadata/bokeh.yaml b/plots/violin-split/metadata/bokeh.yaml
index 9a8a1f66c0..9a34ccbddf 100644
--- a/plots/violin-split/metadata/bokeh.yaml
+++ b/plots/violin-split/metadata/bokeh.yaml
@@ -28,3 +28,178 @@ review:
     resolution
   - Bokeh toolbar icons visible on right edge slightly detract from clean presentation
   - Legend glyph boxes are relatively small compared to the large canvas size
+  image_description: 'The plot displays four split violin plots arranged horizontally
+    across departments (Engineering, Marketing, Sales, Support). Each violin is split
+    into two halves: the left side (Python Blue/steel blue, #306998) represents "Before
+    Training" and the right side (Python Yellow/gold, #FFD43B) represents "After Training".
+    The y-axis shows "Satisfaction Score (1-10)" ranging from about 1 to 10. Each
+    violin half contains white horizontal lines indicating quartile markers - solid
+    line for median, dashed lines for Q1 and Q3. A legend in the top-left corner identifies
+    the two groups. The title "violin-split · bokeh · pyplots.ai" appears centered
+    at the top. The background is a light gray (#fafafa) with subtle dashed horizontal
+    grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18-20pt - all
+          clearly readable, slightly smaller y-axis label
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels well-separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violin shapes are well-sized and clearly visible, quartile markers
+          distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow provide excellent contrast and are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, violins well-distributed, minor right-side tools
+          visible
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Satisfaction Score (1-10)" has implicit units but "Department"
+          lacks context'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (dashed, alpha 0.3), legend well-placed but glyph
+          boxes could be more visible
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct split violin plot with left/right halves for different groups
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis, split groups correctly mapped
+          to halves
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Split halves meet at center, legend present, quartile markers included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full 1-10 scale, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Before/After Training
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Exactly matches required format: violin-split · bokeh · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes across departments, improvement
+          patterns visible, but distributions somewhat similar
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction before/after training is a real, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 1-10 satisfaction scale is realistic; some distributions extend to
+          boundaries
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 2
+        max: 3
+        passed: true
+        comment: Has a helper function (gaussian_kde_numpy) which is reasonable for
+          complexity but deviates from pure KISS
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses HoverTool with custom tooltips, Legend customization, but KDE
+          computed manually rather than using Bokeh's built-in capabilities
+  verdict: APPROVED
diff --git a/plots/violin-split/metadata/highcharts.yaml b/plots/violin-split/metadata/highcharts.yaml
index c972ff0c0a..5e87fe2d50 100644
--- a/plots/violin-split/metadata/highcharts.yaml
+++ b/plots/violin-split/metadata/highcharts.yaml
@@ -25,3 +25,181 @@ review:
   - Missing subtle grid lines on the satisfaction score axis would improve readability
   - Image height is 2561px instead of the expected 2700px (aspect ratio slightly off)
   - Median markers could be slightly more prominent for better visibility
+  image_description: 'The plot displays a split violin chart with 4 departments (Engineering,
+    Marketing, Sales, HR) on the x-axis and Satisfaction Score (1-10) on the y-axis.
+    Each violin is split into two halves: blue (Remote workers) on the left and yellow
+    (Office workers) on the right. The distributions meet at the center line, showing
+    the density of satisfaction scores for each group. White diamond markers indicate
+    the median for each distribution. The title follows the correct format "violin-split
+    · highcharts · pyplots.ai" with a subtitle "Employee Satisfaction Scores: Remote
+    vs Office Workers". A legend in the top-right corner identifies Remote (blue)
+    and Office (yellow). The plot shows clear differences between groups - for example,
+    Engineering Remote workers have higher satisfaction than Office workers, while
+    Sales shows the opposite pattern.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick marks, and legend all clearly readable at
+          full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violin shapes clearly visible with good alpha transparency (0.75),
+          median markers visible but could be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are colorblind-safe, no red-green
+          conflict
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills appropriate area with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Satisfaction Score" and "Department" are descriptive (score is
+          unitless by nature)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: 'No visible grid lines on the plot area (gridLineWidth: 0 on y-axis)'
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct split violin plot with left/right halves for different groups
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values on y-axis, split by group correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has distinct colors, legend, halves meet at center, includes median
+          markers
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full 1-10 range, all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies Remote and Office groups
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "violin-split · highcharts · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied distributions: Engineering favors Remote, Sales favors
+          Office, Marketing/HR relatively similar - demonstrates all aspects of split
+          violins'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction by work arrangement is a highly relevant real-world
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Satisfaction scores 1-10 with realistic means (6-8 range) and standard
+          deviations
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: Imports webdriver Options but naming shadows built-in, though all
+          imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Note: Image dimensions are 4800x2561 instead of expected 4800x2700'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses areasplinerange for smooth violin shapes, inverted chart for
+          horizontal layout, custom KDE computation. Could leverage more Highcharts-specific
+          features like animations or better tooltip formatting.
+  verdict: APPROVED
diff --git a/plots/violin-split/metadata/letsplot.yaml b/plots/violin-split/metadata/letsplot.yaml
index 2a9dfc0f89..6ac50fe3da 100644
--- a/plots/violin-split/metadata/letsplot.yaml
+++ b/plots/violin-split/metadata/letsplot.yaml
@@ -25,3 +25,175 @@ review:
   weaknesses:
   - Grid lines appear slightly more prominent than ideal despite alpha=0.3 setting
   - Star import with many noqa comments reduces code clarity
+  image_description: 'The plot displays a split violin visualization comparing employee
+    satisfaction scores before and after office redesign across four departments:
+    Engineering, Marketing, Sales, and Design. Each violin is split with yellow (Python
+    Yellow #FFD43B) on the left representing "Before" and blue (Python Blue #306998)
+    on the right representing "After". Inner boxplots are embedded within each violin
+    half showing quartiles and median lines, with a few outlier points visible. The
+    y-axis spans from 15 to 105 (satisfaction score 0-100), x-axis shows department
+    names, and a legend on the right indicates the Period (Before/After). All text
+    is clearly legible with appropriately sized fonts.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt bold, axis titles 20pt, tick labels 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violins well-sized, boxplots visible, minor overlap of boxplots with
+          violin center
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow is colorblind-safe, high contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with range "(0-100)", X-axis labeled
+          "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid alpha is 0.3 which is good, but grid lines are too prominent/dark
+          appearing
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct split violin plot with left/right halves for different groups
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, split by Period
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Distinct colors, halves meet at center, inner boxplots included,
+          alpha transparency applied
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full data range 15-105
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Period with Before/After
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "violin-split · letsplot · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied distributions: different means, different spreads,
+          different improvements across departments'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction before/after office redesign is a real, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Satisfaction scores 0-100 with realistic distributions, sensible
+          parameters
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: Uses `from lets_plot import *` with noqa comments throughout
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: false
+        comment: Uses show_half parameter for split violins and layer_tooltips for
+          interactivity, but tooltips not fully leveraged in static output
+  verdict: APPROVED
diff --git a/plots/violin-split/metadata/matplotlib.yaml b/plots/violin-split/metadata/matplotlib.yaml
index d5e0efbd19..9343750a51 100644
--- a/plots/violin-split/metadata/matplotlib.yaml
+++ b/plots/violin-split/metadata/matplotlib.yaml
@@ -26,3 +26,177 @@ review:
     half within each violin
   - 'Implementation header still shows Quality: pending instead of actual score (minor
     metadata issue)'
+  image_description: 'The plot displays four split violin plots comparing salary distributions
+    between Male (blue, #306998) and Female (yellow, #FFD43B) across Engineering,
+    Marketing, Sales, and HR departments. Each violin is properly split with the left
+    half (Male) and right half (Female) meeting at the center line. The distributions
+    show realistic salary patterns with Engineering having the highest salaries (~$40K-$180K
+    range), while HR has the lowest (~$30K-$100K range). White horizontal lines with
+    dark edges mark the median and quartiles (Q1, Q3) on each half. A legend with
+    colored patches appears in the upper left. The title correctly shows "violin-split
+    · matplotlib · pyplots.ai", y-axis is labeled "Annual Salary ($)" with $K formatting,
+    and x-axis shows "Department" with the four category names.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, tick labels 16pt - all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, legend positioned to avoid data
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violins clearly visible with good alpha=0.8, quartile markers visible
+          but could use slightly more contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/yellow is colorblind-safe (not red-green), good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend near plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Annual Salary ($)", X-axis has "Department"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid subtle with alpha=0.3 (good), but legend lacks clear explanation
+          that left=Male, right=Female in the violin context
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct split violin plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y, split_group correctly mapped to halves
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Split violins with distinct colors, legend, quartile markers as suggested
+          in notes
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Male/Female labels match the colors correctly
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "violin-split · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes per department, different spreads,
+          salary differences between groups - good variation but distributions could
+          show more distinct shapes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary comparison by gender across departments is a real, meaningful
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary ranges $30K-$180K are realistic for corporate departments
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set correctly
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot, numpy, and Patch (all used)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves as ''plot.png'' which is correct, but header says "Quality:
+          pending" instead of actual score'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of violinplot with manual path clipping to create split
+          effect, uses Patch for legend, hlines for quartiles. The split violin technique
+          shows matplotlib expertise, but could leverage ax.vlines for additional
+          visual elements or fill_between for more sophisticated styling.
+  verdict: APPROVED
diff --git a/plots/violin-split/metadata/plotly.yaml b/plots/violin-split/metadata/plotly.yaml
index 9e432278b9..41e683d15d 100644
--- a/plots/violin-split/metadata/plotly.yaml
+++ b/plots/violin-split/metadata/plotly.yaml
@@ -23,3 +23,171 @@ review:
   - Grid lines could be slightly more visible (alpha 0.1 is quite faint)
   - Distribution shapes are all unimodal normal; could benefit from more variety to
     showcase violin plot capabilities
+  image_description: 'The plot shows a split violin chart with four departments (Engineering,
+    Marketing, Sales, Operations) on the x-axis and Performance Score (0-100 points)
+    on the y-axis. Each violin is split into two halves: blue ("Before Training")
+    on the left and yellow/gold ("After Training") on the right. The violins clearly
+    show distribution shapes with dashed mean lines visible in each half. The title
+    "violin-split · plotly · pyplots.ai" is centered at the top, with a horizontal
+    legend below it showing the two periods. The background is white with subtle horizontal
+    grid lines. All four departments show the "After Training" distributions shifted
+    higher than "Before Training", demonstrating the training program''s effectiveness.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, tick labels, and legend are all clearly readable
+          at full resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; department labels and legend are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violin shapes are clearly visible with appropriate width and opacity
+          (0.7)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) provide excellent contrast and
+          are colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins; legend positioned appropriately
+          above plot
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has descriptive label with units "Performance Score (0-100
+          points)"; X-axis labeled "Department"
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct split violin plot with left/right halves representing different
+          groups
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis, split by period
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has distinct colors, legend, halves meet at center, meanline markers
+          included
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis range 0-100 shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "Before Training" and "After Training"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows exact format: "violin-split · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes and spreads across departments;
+          slight deduction as distributions could show more variety (e.g., bimodal)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee performance scores before/after training is a realistic,
+          comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Scores 0-100 are realistic; minor deduction as some pre-training
+          distributions seem slightly high
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy, pandas, and plotly.graph_objects imported and used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Leverages Plotly's go.Violin with side parameter, meanline_visible,
+          scalemode, and violinmode overlay; also outputs interactive HTML
+  verdict: APPROVED
diff --git a/plots/violin-split/metadata/plotnine.yaml b/plots/violin-split/metadata/plotnine.yaml
index 1c625cdd75..7198089553 100644
--- a/plots/violin-split/metadata/plotnine.yaml
+++ b/plots/violin-split/metadata/plotnine.yaml
@@ -25,3 +25,176 @@ review:
   - Boxplots are slightly narrow and could be more prominent
   - Distribution shapes are somewhat similar across departments - more varied shapes
     would better demonstrate the plot type capabilities
+  image_description: The plot displays split violin plots for four departments (Engineering,
+    Marketing, Sales, Support) comparing employee satisfaction scores before and after
+    training. Blue violin halves on the left represent "After Training" scores, while
+    yellow/gold violin halves on the right represent "Before Training" scores. Each
+    violin contains an inner boxplot showing quartiles and outliers (small gray dots).
+    The title "violin-split · plotnine · pyplots.ai" is centered at the top. The y-axis
+    shows "Satisfaction Score (0-100)" ranging from about 25 to 100, and the x-axis
+    shows "Department". A legend labeled "Period" appears on the right with "After
+    Training" and "Before Training" entries. The grid lines are subtle and dashed.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: 'All text is clearly readable: title ~24pt, axis labels ~20pt, tick
+          labels ~16pt'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Violins are well-sized and visible; boxplots provide good detail
+          (-1 for boxplots being slightly small)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue and yellow are highly distinguishable and colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though slight imbalance with legend area (-1)
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: "Satisfaction Score (0-100)", "Department"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle but legend order is reversed from visual (After Training
+          shown first in legend but appears on left side of violins, which is correct
+          but "Before Training" comes after in legend despite being on right)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct split violin plot with left-right halves
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Category on x-axis, values on y-axis, split_group for colors
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Split violins meeting at center, distinct colors, legend, inner boxplots
+          as suggested
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within 0-100 range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies "After Training" and "Before Training"
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "violin-split · plotnine · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation between departments and before/after differences;
+          different distribution shapes visible (-1 for similar spread patterns)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Employee satisfaction training program is a realistic, comprehensible
+          scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Satisfaction scores 0-100 is appropriate; values are realistic (-1
+          for some values clustering near ceiling)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine's grammar of graphics with geom_violin(style="left-right"),
+          geom_boxplot, scale_fill_manual, and theme customization. Could leverage
+          more plotnine-specific features.
+  verdict: APPROVED
diff --git a/plots/violin-split/metadata/pygal.yaml b/plots/violin-split/metadata/pygal.yaml
index 00cd0a4904..7e6f51e7f2 100644
--- a/plots/violin-split/metadata/pygal.yaml
+++ b/plots/violin-split/metadata/pygal.yaml
@@ -26,3 +26,181 @@ review:
   - Code contains a helper function (compute_kde) which violates KISS principle -
     should inline the KDE computation or restructure
   - Axis labels could include units (e.g., Recovery Score 0-100)
+  image_description: 'The plot displays four split violin plots for Clinics A through
+    D. Each violin is split into two halves: blue (Before) on the left and yellow
+    (After) on the right. The title reads "violin-split · pygal · pyplots.ai" at the
+    top. The y-axis shows "Recovery Score" ranging from 0-100, and the x-axis shows
+    "Clinic" with four category labels. Each violin half shows a smooth KDE distribution
+    shape with quartile markers (vertical IQR lines and horizontal median markers)
+    visible within each half. The legend at the bottom shows "Before" (blue) and "After"
+    (yellow). The distributions clearly show improvement patterns - After scores are
+    generally higher than Before scores across all clinics. The plot uses a clean
+    white background with subtle horizontal grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick labels are all clearly readable. Font
+          sizes are well-scaled for the 4800x2700 canvas.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. All labels are cleanly separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violin shapes are well-sized and clearly visible. Quartile markers
+          are visible within each half.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and yellow provide good contrast and are distinguishable for
+          most colorblind viewers, though not an ideal colorblind-optimized palette.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Good use of canvas space. Plot fills approximately 60% of canvas.
+          Minor issue: some empty space at bottom below legend.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Recovery Score" and "Clinic" are descriptive but lack units.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle horizontal grid lines (dotted), legend well-placed at bottom.
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct split violin plot with left/right halves representing different
+          groups.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on x-axis, values as distributions, split by group correctly.
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Has distinct colors, legend, and quartile markers. The two halves
+          meet at the center line as specified. Minor: inner markers could be more
+          prominent.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows full 0-100 range, all data visible.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows "Before" and "After" with matching colors.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "violin-split · pygal · pyplots.ai" but uses
+          monospace font style.
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows different distribution shapes and positions across clinics.
+          Each clinic shows different improvement patterns. Could show more variation
+          in distribution shapes (e.g., bimodal).
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent real-world scenario: patient recovery scores before/after
+          treatment across clinics. Very comprehensible.'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Recovery scores 0-100 are realistic, values are clipped appropriately.
+    code_quality:
+      score: 8
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: Code uses a helper function `compute_kde` which violates the "no
+          functions" rule, though it's understandable for KDE computation.
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses `np.random.seed(42)`.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pygal, Style).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: No deprecated functions used.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of pygal's XY chart with custom styling, fill/stroke
+          options, and SVG-native output. Creative solution for split violins using
+          polygon shapes.
+  verdict: APPROVED
diff --git a/plots/violin-split/metadata/seaborn.yaml b/plots/violin-split/metadata/seaborn.yaml
index af1e5751ce..c16dd67b10 100644
--- a/plots/violin-split/metadata/seaborn.yaml
+++ b/plots/violin-split/metadata/seaborn.yaml
@@ -23,3 +23,179 @@ review:
   - Distribution shapes are relatively similar across all groups - more variety would
     better showcase split violin comparative power
   - Only horizontal grid lines present
+  image_description: 'The plot displays four split violin plots arranged horizontally,
+    one for each department (Engineering, Marketing, Sales, Finance). Each violin
+    is split vertically down the center, with the left half (dark blue, #306998) representing
+    Male salary distribution and the right half (golden yellow, #FFD43B) representing
+    Female distribution. The violins show smooth kernel density estimates with inner
+    quartile markers (dashed horizontal lines at 25th, 50th, and 75th percentiles).
+    The y-axis shows Annual Salary ($) ranging from ~$20K to ~$160K with proper currency
+    formatting. Engineering shows the highest salaries (~$90K-$100K median), followed
+    by Finance (~$80K-$85K), Marketing (~$72K-$78K), and Sales (~$65K-$72K). A clear
+    legend in the upper right identifies the Gender groups. The title correctly follows
+    the format ''violin-split · seaborn · pyplots.ai''.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, axis labels at 20pt, tick labels at 16pt - all
+          perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, labels well spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Violin shapes are clear, quartile lines visible, appropriate density
+          representation
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue (#306998) and yellow (#FFD43B) are highly distinguishable, colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has 'Annual Salary ($)' with implied units via formatting,
+          X-axis has 'Department'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3), but only horizontal y-axis grid present;
+          legend well-placed with good styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct split violin plot implementation using seaborn's split=True
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=Department (category), Y=Salary (value), hue=Gender (split_group)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Split violins, distinct colors, legend, inner quartile markers as
+          recommended
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All salary ranges visible, y-axis accommodates full distribution
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows 'Male' and 'Female' with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as 'violin-split · seaborn · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: 'Shows different distribution shapes, varying medians, different
+          spreads across departments. Minor: distributions are relatively similar
+          in shape (all roughly normal)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Salary comparison by gender across departments is a highly relevant
+          real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Salary ranges ($30K-$180K bounds, means $65K-$95K) are realistic
+          for corporate positions
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → plot → styling → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at the start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Only used imports: matplotlib.pyplot, numpy, pandas, seaborn'
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn API correctly
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses seaborn's violinplot with split=True and inner='quart', hue
+          parameter, and palette. Good usage but could leverage more seaborn features
+          like set_theme() or additional statistical annotations.
+  verdict: APPROVED
diff --git a/plots/volcano-basic/metadata/altair.yaml b/plots/volcano-basic/metadata/altair.yaml
index b75d26225e..3501663eca 100644
--- a/plots/volcano-basic/metadata/altair.yaml
+++ b/plots/volcano-basic/metadata/altair.yaml
@@ -25,3 +25,178 @@ review:
   - Could use Altair interactive selection features for highlighting genes by category
   - Color palette could be more colorblind-friendly (e.g., using orange instead of
     red)
+  image_description: 'The volcano plot displays a scatter plot with Log₂ Fold Change
+    on the x-axis (ranging from -6 to 6) and -Log₁₀(p-value) on the y-axis (ranging
+    from approximately 0.2 to 4.0). The title reads "volcano-basic · altair · pyplots.ai"
+    in a clear font at the top. Three categories of points are displayed with appropriate
+    colors: red/coral for Up-regulated genes (on the right side), blue for Down-regulated
+    genes (on the left side), and gray for Not Significant genes (in the center).
+    Two vertical dashed threshold lines are positioned at ±1 log2 fold change, and
+    one horizontal dashed threshold line at approximately 1.3 (-log10(0.05)). The
+    legend is positioned in the upper right corner showing "Significance" categories.
+    The plot shows approximately 500 data points with good transparency (alpha) to
+    handle overlapping points.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and tick marks are all clearly readable at full
+          resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers are well-sized (size=80) with good opacity (0.7) for 500
+          points; slightly better with smaller markers for this density
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Red/blue/gray palette is distinguishable but red-blue is not ideal
+          for all colorblind types
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend positioned appropriately
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with subscript notation (Log₂, Log₁₀)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha 0.3), legend well placed but could be more
+          prominent
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct volcano plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=log2 fold change, Y=-log10(p-value) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has horizontal threshold line at 1.3, vertical lines at ±1, three-color
+          scheme (red/blue/gray)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data points appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels Up-regulated, Down-regulated, Not Significant
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "volcano-basic · altair · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both up and down-regulated genes, non-significant genes, varied
+          p-values; could show a few more extreme outliers
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated differential expression results with biologically plausible
+          correlation between fold change and significance
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for omics data; fold change range (-6 to 6)
+          and p-values are appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only uses altair, numpy, pandas (all necessary)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also saves plot.html (not an error, but html
+          wasn't strictly required)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses Altair features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of declarative encoding, Scale, tooltips; could leverage
+          more interactive features or selection
+  verdict: APPROVED
diff --git a/plots/volcano-basic/metadata/bokeh.yaml b/plots/volcano-basic/metadata/bokeh.yaml
index a7a4487e14..d0bf860d0c 100644
--- a/plots/volcano-basic/metadata/bokeh.yaml
+++ b/plots/volcano-basic/metadata/bokeh.yaml
@@ -27,3 +27,180 @@ review:
   - 'Missing optional feature: labeling of top significant genes by name as suggested
     in spec notes'
   - No hover tooltips to identify individual points - a key Bokeh interactive feature
+  image_description: 'The volcano plot displays 2000 simulated gene expression data
+    points. The x-axis shows "Log₂ Fold Change" ranging from approximately -4 to 6,
+    and the y-axis shows "-Log₁₀ (P-value)" ranging from 0 to about 2.6. Three categories
+    of points are color-coded: gray points (Not significant) form a dense cloud in
+    the center/lower portion, blue points (Down-regulated) appear in the upper-left
+    quadrant representing genes with negative fold changes and high significance,
+    and red points (Up-regulated) appear in the upper-right quadrant representing
+    genes with positive fold changes and high significance. Three dashed threshold
+    lines are present: a horizontal line at approximately y=1.3 (p-value threshold
+    of 0.05) and vertical lines at x=±1 (fold change thresholds). The legend is positioned
+    in the top-right corner with a light background. The overall background is a subtle
+    light gray (#FAFAFA).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 36pt, axis labels at 28pt, tick labels at 22pt - all clearly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized for 2000 points (size 18-25 with alpha 0.5-0.7),
+          slight deduction as non-significant points could use slightly more alpha
+          contrast
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses red/blue for up/down which is acceptable but not optimal for
+          colorblind users; gray for non-significant is good
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills adequate space
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (though log values are unitless)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle dotted grid with alpha 0.3, legend well-placed with good styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct volcano plot implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = log2 fold change, Y = -log10(p-value) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal threshold line at ~1.3, vertical
+          lines at ±1, color coding for significance, alpha transparency'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes auto-scaled appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify all three categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses exact format "volcano-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows up-regulated, down-regulated, and non-significant genes with
+          realistic correlation between fold change magnitude and significance; missing
+          top gene labels as suggested in spec notes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated differential gene expression results - appropriate scientific
+          context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 2000 genes with reasonable fold change (-4 to 6) and p-value ranges;
+          slightly wide range on positive side
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: 'Minor: could use more modern bokeh patterns'
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, Span for threshold lines, figure with scatter
+          - standard Bokeh usage but no advanced features like hover tooltips or interactive
+          callbacks
+  verdict: APPROVED
diff --git a/plots/volcano-basic/metadata/highcharts.yaml b/plots/volcano-basic/metadata/highcharts.yaml
index 208744badb..d02cd83ffd 100644
--- a/plots/volcano-basic/metadata/highcharts.yaml
+++ b/plots/volcano-basic/metadata/highcharts.yaml
@@ -27,3 +27,174 @@ review:
     for overlapping points)
   - Code structure is lengthy due to Selenium screenshot approach, but this is inherent
     to Highcharts
+  image_description: 'The plot displays a volcano plot for differential gene expression
+    analysis. The chart has a white background with a title "volcano-basic · highcharts
+    · pyplots.ai" at the top in bold black text, followed by a subtitle "Differential
+    Gene Expression Analysis". The x-axis shows "log₂(Fold Change)" ranging from approximately
+    -4.75 to 5.75, and the y-axis shows "-log₁₀(p-value)" ranging from 0 to 11. Three
+    series of data points are displayed: gray circles for "Not Significant (323)"
+    points clustered mainly in the center, blue circles for "Down-regulated (85)"
+    points on the left side (negative fold change), and orange circles for "Up-regulated
+    (92)" points on the right side (positive fold change). Vertical dashed lines mark
+    the fold change thresholds at -1 and +1 (labeled "FC = -1" and "FC = +1"), and
+    a horizontal dashed line at approximately y=1.3 marks the p-value significance
+    threshold (labeled "p = 0.05"). The legend is positioned in the upper right corner
+    with clear labeling and counts for each category.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: all text readable, title and axis labels are clear and well-sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: markers are appropriately sized for data density, though some overlap
+          in dense regions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: blue/orange/gray palette is colorblind-safe, avoids red-green
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: good use of canvas space, though margins are quite large
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive axis labels with proper notation (log₂, log₁₀)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: subtle grid lines, well-positioned legend with counts
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct scatter/volcano plot type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: log2 fold change on x-axis, -log10 p-value on y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: includes threshold lines at ±1 FC, p=0.05, color coding by significance
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: axes show all data points appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: legend labels correct with counts
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses correct format "volcano-basic · highcharts · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: shows significant up/down regulated and non-significant points, good
+          distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: differential gene expression is a realistic scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: fold changes and p-values are realistic for gene expression data
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: code is linear but longer than necessary due to Highcharts/Selenium
+          complexity
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) is set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: all imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: uses strict=True in zip which requires Python 3.10+
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: uses Highcharts annotations and plotLines, but no interactive features
+          showcased
+  verdict: APPROVED
diff --git a/plots/volcano-basic/metadata/letsplot.yaml b/plots/volcano-basic/metadata/letsplot.yaml
index f5c663c8c1..87edd18b82 100644
--- a/plots/volcano-basic/metadata/letsplot.yaml
+++ b/plots/volcano-basic/metadata/letsplot.yaml
@@ -22,3 +22,177 @@ review:
   - Correct title format following spec-id · library · pyplots.ai convention
   weaknesses:
   - Legend title says Status instead of Significance for better clarity
+  image_description: 'The plot displays a volcano plot with log2 fold change on the
+    x-axis (ranging from approximately -3.5 to 3.5) and -log10(p-value) on the y-axis
+    (ranging from 0 to 3.6). Points are colored in three categories: blue for down-regulated
+    genes (left side, above threshold), gray for non-significant genes (center, mostly
+    below threshold), and red for up-regulated genes (right side, above threshold).
+    Dashed vertical threshold lines are present at -1 and +1 fold change, and a horizontal
+    dashed threshold line at ~1.3 (-log10(0.05)). The title reads "volcano-basic ·
+    letsplot · pyplots.ai" and a legend on the right shows "Status" with Down-regulated
+    (blue), Not significant (gray), and Up-regulated (red). The layout is clean with
+    good use of whitespace and the minimal theme provides an uncluttered background.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is bold and large, axis labels are clearly readable, tick labels
+          are appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels are clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Points are well-sized with good alpha transparency (0.7) for the
+          500 data points, though slightly larger points could improve visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/gray/red palette is colorblind-friendly and has excellent contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, plot fills most of the space with balanced
+          margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels: "Log2 Fold Change" and "-Log10(p-value)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: No visible grid lines (though theme_minimal was used, grid is not
+          apparent in output)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct volcano plot with scatter points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=log2_fold_change, Y=neg_log10_pvalue correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal threshold at ~1.3, vertical
+          thresholds at ±1, color coding by significance status'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data points visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all three categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "volcano-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows up-regulated, down-regulated, and non-significant genes; good
+          distribution across quadrants
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated differential expression data is a realistic scenario for
+          volcano plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are appropriate for gene expression data, though the p-value
+          distribution could be slightly more varied
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_point, scale_color_manual, theme_minimal,
+          and ggsave with scale parameter; could use more advanced features like tooltips
+          for interactivity
+  verdict: APPROVED
diff --git a/plots/volcano-basic/metadata/matplotlib.yaml b/plots/volcano-basic/metadata/matplotlib.yaml
index 046d58498d..9224a47449 100644
--- a/plots/volcano-basic/metadata/matplotlib.yaml
+++ b/plots/volcano-basic/metadata/matplotlib.yaml
@@ -27,3 +27,178 @@ review:
     or custom colormaps
   - Legend placement in upper right could potentially overlap with high-significance
     up-regulated points
+  image_description: 'The plot shows a volcano plot displaying differential gene expression
+    data. The x-axis shows "Log₂ Fold Change" ranging from approximately -6 to 6,
+    and the y-axis shows "-Log₁₀ (p-value)" ranging from 0 to 6. Points are colored
+    in three categories: gray for "Not significant" (centered in the middle below
+    the threshold lines), blue (#306998, Python Blue) for "Down-regulated" genes (left
+    side), and gold/yellow (#FFD43B, Python Yellow) for "Up-regulated" genes (right
+    side). Three dashed threshold lines are present: one horizontal line at y≈1.3
+    (p-value significance cutoff) and two vertical lines at x=±1 (fold change cutoffs).
+    Top significant genes are labeled on both sides (Gene_1971, Gene_262, Gene_668,
+    Gene_1591, Gene_1539 on the left; Gene_1957, Gene_1615, Gene_209, Gene_179, Gene_478
+    on the right). The title follows the correct format "volcano-basic · matplotlib
+    · pyplots.ai". Legend is positioned in the upper right. Grid lines are subtle
+    with dashed style and low alpha.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title 24pt, axis labels 20pt, ticks 16pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, gene labels well positioned with offsets
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Markers visible with appropriate alpha, but sizes could be slightly
+          larger per guidelines (s=50/80 used, could be 100-200 for 2000 points)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and gold provide good contrast, though not a standard colorblind-safe
+          palette (blue-yellow is generally accessible but not optimal)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, good margins, symmetric x-axis limits
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with subscript formatting (Log₂, Log₁₀)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but legend could be better positioned
+          to not overlap the data area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct volcano plot with fold change vs significance
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: log2 fold change on x-axis, -log10(p-value) on y-axis as specified
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: threshold lines (horizontal at 1.3, vertical
+          at ±1), color coding by significance, gene labels on top features'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data with proper padding (1.1x multiplier)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly identifies all three categories
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "volcano-basic · matplotlib · pyplots.ai"'
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: non-significant cluster, up-regulated, down-regulated,
+          high significance outliers, threshold regions'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated differential gene expression data, scientifically plausible
+          with realistic p-value distribution
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 2000 genes, log2 fold changes centered at 0 with realistic spread,
+          p-values correlated with effect size
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib and numpy, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses standard scatter and annotation, no distinctive matplotlib features
+          like custom colormaps, PathEffects, or advanced annotations
+  verdict: APPROVED
diff --git a/plots/volcano-basic/metadata/plotly.yaml b/plots/volcano-basic/metadata/plotly.yaml
index 61965a128d..a5a8f0d81c 100644
--- a/plots/volcano-basic/metadata/plotly.yaml
+++ b/plots/volcano-basic/metadata/plotly.yaml
@@ -27,3 +27,179 @@ review:
     smaller markers or more transparency
   - Could utilize more Plotly-specific features like updatemenus or rangeslider for
     enhanced interactivity
+  image_description: 'The plot displays a volcano plot with approximately 500 data
+    points representing simulated gene expression data. The x-axis shows "log₂ Fold
+    Change" ranging from approximately -4 to 6, and the y-axis shows "-log₁₀(p-value)"
+    ranging from 0 to about 3.5. Points are colored in three categories: gray for
+    "Not Significant" (majority of points clustered in the center), blue (#306998)
+    for "Down-regulated" genes (left side, above thresholds), and orange (#D35400)
+    for "Up-regulated" genes (right side, above thresholds). Dashed threshold lines
+    are clearly visible: horizontal line at p = 0.05 (~1.3 on y-axis) and vertical
+    lines at log2FC = ±1. Three top significant genes are labeled with arrows (Gene_374,
+    Gene_346, Gene_236). The legend is positioned in the upper-left with a semi-transparent
+    white background. The overall layout uses the plotly_white template with subtle
+    gridlines.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt, axis labels at 22pt, ticks at 18pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; gene annotations well-positioned with
+          arrows
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Marker sizes appropriate (10-12px), opacity levels good (0.5-0.8),
+          though some central overlap in gray points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue/orange/gray palette is colorblind-safe (no red-green only distinction)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, legend well-placed
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Descriptive labels with proper subscript notation (log₂, log₁₀)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle but legend entries for threshold lines add clutter;
+          could be simplified
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct volcano plot visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X = log2 fold change, Y = -log10(p-value) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal threshold at p=0.05, vertical
+          thresholds at ±1, color-coded significance, top gene labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly describe each category
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "volcano-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows up-regulated, down-regulated, and non-significant genes; good
+          spread of significance values; could show more extreme p-values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression scenario is realistic and neutral; appropriate for
+          differential expression analysis
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: log2FC range (-4 to 6) and p-value range reasonable, though p-values
+          could extend lower for more dramatic top hits
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png at correct resolution (4800×2700 via scale=3)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of hover templates with custom formatting, interactive HTML
+          export, but could leverage more Plotly-specific features like custom hover
+          data or range sliders
+  verdict: APPROVED
diff --git a/plots/volcano-basic/metadata/plotnine.yaml b/plots/volcano-basic/metadata/plotnine.yaml
index ce1be19366..fe4d8b8fbd 100644
--- a/plots/volcano-basic/metadata/plotnine.yaml
+++ b/plots/volcano-basic/metadata/plotnine.yaml
@@ -25,3 +25,177 @@ review:
   - Could use colorblind-safe palette instead of red-blue (consider using orange or
     different saturation)
   - Grid lines not visible (theme_minimal removes them) which could aid reading values
+  image_description: 'The volcano plot displays a scatter plot with Log2 Fold Change
+    on the x-axis (ranging from approximately -3 to 4) and -Log10(p-value) on the
+    y-axis (ranging from 0 to 4). Three distinct groups of points are visible: blue
+    points on the left representing down-regulated genes clustered around x = -2.5
+    with high significance (y > 2), gray points in the center showing non-significant
+    genes spread across x = -2 to 2 with low significance (y < 1.3), and red points
+    on the right representing up-regulated genes clustered around x = 2.5 with high
+    significance (y > 2). Dashed threshold lines are present: horizontal line at y
+    ≈ 1.3 (significance cutoff) and vertical lines at x = -1 and x = 1 (fold change
+    thresholds). Six gene labels are annotated (Gene_416, Gene_415, Gene_443 on the
+    left; Gene_469, Gene_478, Gene_475 on the right). The legend shows "Significance"
+    with three categories. Title follows the required format.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor overlap between Gene_469 and Gene_478 labels on the right side
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Point size=3 with alpha=0.7 is well-suited for 500 data points
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and red are distinguishable but red-blue is not fully colorblind-safe;
+          gray provides good contrast for non-significant
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive but could include units or more detail
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: No visible grid (minimal theme), legend well-placed on right
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct volcano plot with fold change vs significance
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=log2FC, Y=-log10(p-value) correctly assigned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Horizontal threshold at -log10(0.05), vertical thresholds at ±1,
+          color by significance, gene labels present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Down-regulated, Not significant, Up-regulated
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: volcano-basic · plotnine · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows up-regulated, down-regulated, and non-significant genes; good
+          distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression data is a perfect real-world scenario for volcano
+          plots
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Reasonable fold changes and p-values for gene expression
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear structure: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) used
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses 'size' parameter in geom_hline/geom_vline which should be 'linewidth'
+          in newer versions
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of plotnine's grammar of graphics (ggplot, aes, geom layers,
+          scale_color_manual, theme_minimal), but could leverage more advanced features
+          like faceting or stat layers
+  verdict: APPROVED
diff --git a/plots/volcano-basic/metadata/pygal.yaml b/plots/volcano-basic/metadata/pygal.yaml
index 91947c99c4..e73bb28bcc 100644
--- a/plots/volcano-basic/metadata/pygal.yaml
+++ b/plots/volcano-basic/metadata/pygal.yaml
@@ -28,3 +28,179 @@ review:
     configured
   - Red/blue color scheme is not colorblind-safe; consider using colorblind-friendly
     palette
+  image_description: 'The plot displays a volcano plot with the title "volcano-basic
+    · pygal · pyplots.ai" at the top. The X-axis shows "Log₂ Fold Change" ranging
+    from -5 to 5, and the Y-axis shows "-Log₁₀(p-value)" ranging from -5 to 5. Data
+    points are scattered across the plot in three colors: gray points (Not Significant)
+    clustered near the center around x=0 with low y-values (0-1.5), blue points (Down-regulated)
+    in the upper-left quadrant around x=-2.5 to -3.5 with y-values of 2-3.5, and red
+    points (Up-regulated) in the upper-right quadrant around x=1.5 to 3.5 with y-values
+    of 1.5-4. A legend appears in the top-left corner showing "Not Significant", "Up-regulated",
+    and "Down-regulated". The plot has subtle grid lines. Notably, threshold lines
+    specified in the spec (horizontal at y=1.3 and vertical at x=±1) are NOT visible
+    in the rendered output despite being coded.'
+  criteria_checklist:
+    visual_quality:
+      score: 29
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Title and axis labels are readable but slightly small for the canvas
+          size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 8
+        passed: true
+        comment: Points are visible but quite small; density is manageable but markers
+          could be larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses red/blue which is not ideal for colorblind users, but gray provides
+          contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 2
+        max: 5
+        passed: true
+        comment: Y-axis extends to -5 when all data is above 0, wasting ~50% of vertical
+          canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses subscript notation "Log₂ Fold Change" and "-Log₁₀(p-value)"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is positioned in top-left corner away from data, and threshold
+          lines are missing from render
+    spec_compliance:
+      score: 18
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct XY scatter plot for volcano visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: log2 fold change on X, -log10(p-value) on Y
+      - id: SC-03
+        name: Required Features
+        score: 2
+        max: 5
+        passed: false
+        comment: '**Missing threshold lines**: The spec requires horizontal line at
+          y=1.3 and vertical lines at x=±1. These are coded but do not appear in the
+          rendered output'
+      - id: SC-04
+        name: Data Range
+        score: 1
+        max: 3
+        passed: true
+        comment: Y-axis extends into negative values (-5 to 5) when data is all positive
+          (0 to ~4), poor range selection
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify categories
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: true
+        comment: Title format is correct "volcano-basic · pygal · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all three categories: non-significant (gray), up-regulated
+          (red), down-regulated (blue)'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Gene expression differential analysis is a realistic scientific scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 3
+        max: 5
+        passed: true
+        comment: Values are reasonable for omics data, but the y-axis range including
+          negative values is inappropriate
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean linear structure with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: true
+        comment: Imports Style but threshold lines do not render correctly
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Code attempts threshold lines but they fail to render
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses pygal XY chart with custom Style, tooltips with gene names, dashed
+          stroke styles attempted
+        score: 3
+        max: 5
+        passed: true
+        comment: ''
+  verdict: APPROVED
diff --git a/plots/volcano-basic/metadata/seaborn.yaml b/plots/volcano-basic/metadata/seaborn.yaml
index 2e9f8bcd05..4817ba7a41 100644
--- a/plots/volcano-basic/metadata/seaborn.yaml
+++ b/plots/volcano-basic/metadata/seaborn.yaml
@@ -27,3 +27,179 @@ review:
     of blue-red)
   - Missing optional gene labels for top significant features mentioned in the spec
     notes
+  image_description: 'The plot displays a volcano plot with 500 data points representing
+    simulated differential gene expression data. The x-axis shows "Log2 Fold Change"
+    ranging from approximately -3.5 to 4.5, and the y-axis shows "-Log10(p-value)"
+    ranging from 0 to about 7. Three categories of points are shown: gray points for
+    "Not Significant" genes clustered around the center with low p-values, blue points
+    for "Down-regulated" genes on the left side (negative fold change, high significance),
+    and red/coral points for "Up-regulated" genes on the right side (positive fold
+    change, high significance). Horizontal dashed threshold line at p=0.05 (y≈1.3)
+    and vertical dashed lines at FC=0.5 and FC=2 (log2FC=±1) are clearly visible.
+    The legend is positioned in the upper right corner. The title follows the correct
+    format: "volcano-basic · seaborn · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers well-sized (s=100) with alpha=0.7, appropriate for 500 points.
+          Slight deduction as some gray points overlap heavily in the center
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Blue and red/coral colors are distinguishable, though red-blue isn't
+          the most colorblind-safe combination (could use viridis-based colors)
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Descriptive labels but no units (though technically log values are
+          unitless)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), legend well-placed but slightly overlaps
+          plot area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct volcano plot with scatter points
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X=log2 fold change, Y=-log10(p-value) correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: horizontal threshold at p=0.05, vertical
+          thresholds at ±1, three-color significance coding'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correct and meaningful
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "volcano-basic · seaborn · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows up-regulated, down-regulated, and non-significant genes well.
+          Missing gene labels for top hits as suggested in spec (optional feature)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulated differential expression data with realistic distribution
+          pattern (volcano shape)
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for omics data, though -log10(p-value) reaching
+          ~6 suggests some very low p-values
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean flow: imports → data → plot → save, no functions/classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern seaborn API with hue parameter
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: 'Saves as plot.png but minor: could use plt.show() commented for
+          interactive use'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses seaborn's scatterplot with hue categorization
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of seaborn's hue and palette features, but doesn't leverage
+          more advanced seaborn features like FacetGrid or statistical annotations
+  verdict: APPROVED
diff --git a/plots/waffle-basic/metadata/altair.yaml b/plots/waffle-basic/metadata/altair.yaml
index 0c533e63e5..301d38cfea 100644
--- a/plots/waffle-basic/metadata/altair.yaml
+++ b/plots/waffle-basic/metadata/altair.yaml
@@ -24,3 +24,176 @@ review:
   - Could include a smaller category (<5%) to better demonstrate waffle chart capability
     with small proportions
   - Legend symbol size (500) is quite large relative to the actual grid squares
+  image_description: 'The plot displays a 10x10 waffle chart representing budget allocation
+    across 4 categories. The grid contains 100 squares with rounded corners and white
+    borders between them. Colors used: dark blue (#306998) for Marketing (35 squares
+    at the bottom), yellow (#FFD43B) for Operations (28 squares), teal/turquoise (#4ECDC4)
+    for R&D (25 squares), and coral/salmon (#E76F51) for HR (12 squares at the top).
+    The title "Budget Allocation · waffle-basic · altair · pyplots.ai" appears at
+    the top. A legend on the right side shows each category with its percentage in
+    parentheses. The chart fills from bottom-left to top-right, row by row. The overall
+    layout is clean with good use of space.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt is clearly readable, legend title at 24pt and labels
+          at 20pt are excellent
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean separation between grid squares and
+          legend
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Grid squares are well-sized with good stroke width (3px) and corner
+          radius (6px)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors with good contrast; blue, yellow, teal, coral
+          are distinguishable for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout but slight imbalance with legend area having more whitespace
+          than needed
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for waffle charts (no axes), but spec does not require them
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on right with percentages included, grid styling
+          is clean
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waffle chart with 10x10 grid
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to colored squares proportionally
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 10x10 grid (100 squares), distinct colors, legend with percentages,
+          values sum to 100
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 100 squares shown, all categories represented
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend shows correct category names with accurate percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{description} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 4 categories with varying proportions, but could show more
+          diversity (e.g., a very small category <5%)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation is a perfect real-world use case for waffle charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic (35%, 28%, 25%, 12% summing to 100%), though
+          smallest category is 12%
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → grid building → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png and plot.html which is correct for Altair
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses mark_rect with encoding, labelExpr for dynamic legend labels,
+          but could leverage more Altair-specific features like selections or layering
+  verdict: APPROVED
diff --git a/plots/waffle-basic/metadata/bokeh.yaml b/plots/waffle-basic/metadata/bokeh.yaml
index 2a2d73b12f..3a7ae05eda 100644
--- a/plots/waffle-basic/metadata/bokeh.yaml
+++ b/plots/waffle-basic/metadata/bokeh.yaml
@@ -27,3 +27,175 @@ review:
     on hover
   - Yellow (#FFD43B) and orange (#E34F26) could be more distinct for better colorblind
     accessibility
+  image_description: 'The plot displays a 10×10 waffle chart (100 squares) representing
+    programming language preferences. The grid is filled from bottom-to-top, left-to-right
+    with: Python in blue (#306998) occupying 42 squares at the bottom, JavaScript
+    in yellow (#FFD43B) with 28 squares in the middle, Java in orange (#E34F26) with
+    18 squares above that, and Other in purple (#7B68EE) with 12 squares at the top.
+    The title "Programming Language Preferences · waffle-basic · bokeh · pyplots.ai"
+    appears centered at the top. A legend on the right side lists all categories with
+    their percentages. The background is a subtle light gray (#fafafa), and squares
+    have white borders creating visual separation.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt is excellent, legend at 24pt is readable but could
+          be slightly larger for optimal viewing
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Squares are well-sized with appropriate gaps and alpha
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Colors are distinct but yellow and orange could be confused by some
+          colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, waffle grid is well-centered with legend appropriately
+          placed
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for waffle charts, axes correctly hidden
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend is functional but positioned far from the chart; legend items
+          could be closer to the grid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waffle chart with 10×10 grid
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to squares proportionally
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 10×10 grid, distinct colors, legend with
+          percentages'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 100 squares accounted for
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all categories with percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{topic} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 4 categories with varying proportions; could show more variation
+          in sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language survey is a relatable, realistic scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Percentages sum to 100 and are plausible, though distribution could
+          be more interesting
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → grid building → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Data is deterministic but no random seed comment (not needed here,
+          but the values are hardcoded which is fine)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource and proper Bokeh Legend construction; could
+          leverage hover tooltips for interactivity
+  verdict: APPROVED
diff --git a/plots/waffle-basic/metadata/highcharts.yaml b/plots/waffle-basic/metadata/highcharts.yaml
index c68f93ad94..af06b21857 100644
--- a/plots/waffle-basic/metadata/highcharts.yaml
+++ b/plots/waffle-basic/metadata/highcharts.yaml
@@ -24,3 +24,174 @@ review:
   weaknesses:
   - Category names are generic (Product A/B/C/D) rather than a realistic scenario
     like budget allocation or survey responses
+  image_description: 'The plot displays a 10x10 waffle chart grid representing market
+    share distribution across four product categories. The chart uses a clean white
+    background with the title "waffle-basic · highcharts · pyplots.ai" centered at
+    the top. The grid is filled from bottom to top: Product A (dark blue #306998)
+    occupies the bottom 4+ rows (42 squares), Product B (yellow #FFD43B) fills the
+    next 2-3 rows (28 squares), Product C (green #4DAF4A) takes approximately 2 rows
+    (18 squares), and Product D (pink #E377C2) fills the top portion (12 squares).
+    Each square has a white border creating clear separation. A vertical legend on
+    the right side displays category names with their percentages: "Product A (42%)",
+    "Product B (28%)", "Product C (18%)", "Product D (12%)".'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 48px is clearly readable, legend text at 32px is well-sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Square markers at radius 95 create excellent waffle grid visibility
+          with white borders
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe palette using blue, yellow, green, and pink (no red-green
+          conflict)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with grid well-centered, though slight asymmetry with
+          legend placement
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for waffle charts (axes hidden by design)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed on the right, clearly shows categories with percentages
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waffle chart using 10x10 grid
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to grid squares (42+28+18+12=100)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 10x10 grid, distinct colors, legend with
+          percentages'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 100 squares shown, values sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately shows all categories with correct percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses exact format: "waffle-basic · highcharts · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 4 categories with varying proportions, though could show more
+          diverse sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Market share distribution is plausible but generic "Product A/B/C/D"
+          names
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages sum to 100, realistic market share values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → grid creation → chart → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (fixed values, no random seed needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, highcharts, selenium, etc.)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ScatterSeries with square markers creatively to create waffle
+          effect, but this is a workaround rather than a native Highcharts feature
+  verdict: APPROVED
diff --git a/plots/waffle-basic/metadata/letsplot.yaml b/plots/waffle-basic/metadata/letsplot.yaml
index b2840bef7a..60ea1989fb 100644
--- a/plots/waffle-basic/metadata/letsplot.yaml
+++ b/plots/waffle-basic/metadata/letsplot.yaml
@@ -25,3 +25,172 @@ review:
     features
   - Canvas utilization could be slightly improved - waffle grid appears a bit small
     relative to total canvas
+  image_description: 'The plot displays a 10x10 waffle chart (100 squares total) representing
+    market share by product category. The grid uses four distinct colors: **Python
+    Blue (#306998)** for Product A occupying the top 42 squares (42%), **yellow (#FFD43B)**
+    for Product B taking 28 squares, **green (#4CAF50)** for Product C with 18 squares,
+    and **coral/orange (#FF7043)** for Product D with 12 squares at the bottom. The
+    squares are arranged in a clean grid with small gaps between them. The title "waffle-basic
+    · letsplot · pyplots.ai" appears centered at the top. A well-formatted legend
+    on the right side shows each category with its percentage (e.g., "Product A (42%)").
+    The layout uses a white/void background with the waffle grid centered.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, legend title at 18pt, legend text at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Squares are well-sized with proper 0.9 width/height creating visible
+          gaps
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors (blue, yellow, green, orange) easily distinguishable,
+          colorblind-safe
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good centering, but waffle grid could utilize slightly more canvas
+          space
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for waffle charts (uses theme_void appropriately)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed, clean void theme
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waffle chart using 10x10 grid
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to colored squares
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features: 10x10 grid, distinct colors, legend with percentages'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 100 squares filled, values sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend shows category names with percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "waffle-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 4 categories with varying proportions, though could show more
+          variety in sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share scenario is realistic and relatable
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, values are plausible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Uses deterministic data (no random), but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses lets-plot features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of geom_tile, coord_fixed, theme_void, scale_fill_manual,
+          but no interactive tooltips leveraged
+  verdict: APPROVED
diff --git a/plots/waffle-basic/metadata/matplotlib.yaml b/plots/waffle-basic/metadata/matplotlib.yaml
index 294f19ffcd..c66efbfa15 100644
--- a/plots/waffle-basic/metadata/matplotlib.yaml
+++ b/plots/waffle-basic/metadata/matplotlib.yaml
@@ -25,3 +25,174 @@ review:
   - Data could include a very small category to show how the chart handles edge cases
   - Could use additional matplotlib features like annotating total percentage per
     category on the chart
+  image_description: 'The plot displays a 10x10 waffle chart with 100 rounded squares
+    arranged in a grid. The chart uses four distinct colors: deep blue (#306998) for
+    Product A occupying the bottom 42 squares (4.2 rows), golden yellow (#FFD43B)
+    for Product B taking the next 28 squares (2.8 rows), bright green (#4DAF4A) for
+    Product C with 18 squares, and pink (#E377C2) for Product D filling the top 12
+    squares. Each square has subtle rounded corners and white gaps between them for
+    visual separation. The title "waffle-basic · matplotlib · pyplots.ai" appears
+    centered at the top in large clear font. A well-styled legend with a shadow effect
+    is positioned to the right of the grid, showing each category with its percentage
+    value (e.g., "Product A (42%)"). The layout is clean with good use of whitespace
+    and the chart fills the canvas appropriately.'
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt is perfectly readable, legend text at 18pt is clear
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, all text fully readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Squares are well-sized with good spacing and visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are distinct (blue, yellow, green, pink) and colorblind-friendly
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good proportions, chart uses canvas space well, legend positioned
+          nicely
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: N/A for waffle charts (no axes), but no descriptive context on the
+          chart itself
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend is well-placed with fancybox and shadow styling
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waffle chart implementation with 10x10 grid
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly map to square counts (42, 28, 18, 12 = 100)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 10x10 grid, distinct colors, legend with
+          percentages'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, grid complete
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows categories with percentages
+      - id: SC-06
+        name: Title Format
+        score: 0
+        max: 2
+        passed: false
+        comment: Title uses correct format but could include a subtitle describing
+          the data context
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 4 categories with varying proportions, but could demonstrate
+          more variety (e.g., very small category)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Market share distribution is a plausible real-world scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages sum to 100%, values are realistic for market share
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib.patches, pyplot, numpy)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct settings
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses FancyBboxPatch with rounded corners, but could leverage more
+          matplotlib-specific features
+  verdict: APPROVED
diff --git a/plots/waffle-basic/metadata/plotly.yaml b/plots/waffle-basic/metadata/plotly.yaml
index 19ffb5d8fe..4fb2bcaf6a 100644
--- a/plots/waffle-basic/metadata/plotly.yaml
+++ b/plots/waffle-basic/metadata/plotly.yaml
@@ -26,3 +26,163 @@ review:
     small relative to available space)
   - Could add hover interactivity to show category details when hovering over squares
     (Plotly strength not utilized)
+  image_description: 'The plot displays a 10x10 waffle chart (100 squares) representing
+    budget allocation across 5 categories. From bottom to top: dark blue squares for
+    Operations (35%), golden yellow squares for Marketing (25%), teal/cyan squares
+    for R&D (22%), coral/orange squares for HR (12%), and purple squares for Other
+    (6%). The title "Budget Allocation · waffle-basic · plotly · pyplots.ai" is centered
+    at the top in black text. A horizontal legend below the grid shows all categories
+    with their percentages. The squares have white borders separating them, and the
+    overall background is white.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title and legend text are large and clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements anywhere
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Squares are well-sized with clear white borders
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colors are distinct and colorblind-friendly (blue, yellow, teal,
+          orange, purple)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Grid is centered but could use more canvas space; some empty areas
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed horizontally below chart with clear labels
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waffle chart with 10x10 grid
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to square counts
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 10x10 grid, distinct colors, legend with
+          percentages'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 100 squares filled, values sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all categories with percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{topic} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows 5 categories with varying proportions; could benefit from more
+          contrast in sizes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation is a perfect real-world use case for waffle charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Percentages are realistic for budget categories
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → grid creation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: No random seed needed (deterministic data), but numpy imported unnecessarily
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with square markers and proper layout; could leverage
+          more Plotly-specific features like hover info
+  verdict: APPROVED
diff --git a/plots/waffle-basic/metadata/plotnine.yaml b/plots/waffle-basic/metadata/plotnine.yaml
index a183b131ee..17d0e743de 100644
--- a/plots/waffle-basic/metadata/plotnine.yaml
+++ b/plots/waffle-basic/metadata/plotnine.yaml
@@ -15,3 +15,60 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  image_description: |-
+    The plot displays a 10x10 waffle grid (100 squares total) representing budget allocation across 5 categories. The grid is filled from top-left, reading left-to-right then row-by-row downward. Categories are:
+    - **Housing (35%)**: Dark blue (#306998) - occupies 35 squares in the top portion
+    - **Food (25%)**: Yellow (#FFD43B) - occupies 25 squares in the middle
+    - **Transport (18%)**: Teal/cyan (#4ECDC4) - occupies 18 squares
+    - **Entertainment (12%)**: Coral/orange-red (#E76F51) - occupies 12 squares
+    - **Savings (10%)**: Gray (#95A5A6) - occupies 10 squares at the bottom
+
+    The title "waffle-basic · plotnine · pyplots.ai" is centered at the top. A legend on the right shows each category with its percentage. The squares have white borders separating them. No axis labels or ticks are shown (appropriate for this chart type). The overall layout is clean with good proportions.
+  criteria_checklist:
+    code_quality:
+      score: 13
+      max: 15
+      items:
+      - id: CQ-01
+        name: KISS structure
+        score: 4
+        max: 4
+        passed: true
+        comment: Simple imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducible
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Library idioms
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses plotnine ggplot syntax correctly
+      - id: CQ-04
+        name: Clean imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-05
+        name: Helpful comments
+        score: 0
+        max: 1
+        passed: false
+        comment: Data section has comment, but grid logic could use explanation
+      - id: CQ-06
+        name: No deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current plotnine API
+      - id: CQ-07
+        name: Output correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+  verdict: APPROVED
diff --git a/plots/waffle-basic/metadata/pygal.yaml b/plots/waffle-basic/metadata/pygal.yaml
index c7a9af9f5d..2d0871752f 100644
--- a/plots/waffle-basic/metadata/pygal.yaml
+++ b/plots/waffle-basic/metadata/pygal.yaml
@@ -23,3 +23,169 @@ review:
   - Uses a custom class instead of KISS script structure (pygal lacks built-in waffle
     chart, so this is justified)
   - Slight horizontal whitespace imbalance - could use more of the canvas width
+  image_description: 'The plot displays a 10x10 waffle chart (100 squares total) representing
+    budget allocation across 4 categories. The grid is centered on a white background
+    with the title "Budget Allocation · waffle-basic · pygal · pyplots.ai" at the
+    top. The squares have rounded corners and are colored: blue (#306998) for Operations
+    (42 squares, top portion), yellow (#FFD43B) for Marketing (28 squares, middle
+    portion), teal (#4ECDC4) for R&D (18 squares, lower-middle), and coral/salmon
+    (#FF6B6B) for Admin (12 squares, bottom). A legend at the bottom shows each category
+    with its percentage in parentheses: "Operations (42%)", "Marketing (28%)", "R&D
+    (18%)", "Admin (12%)". The layout is clean and well-proportioned.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title and legend text are clearly readable at the 4800x2700 resolution
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements, clean layout
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Squares are well-sized with appropriate gaps, easy to count
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Four distinct colors that are colorblind-friendly (blue, yellow,
+          teal, coral)
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good centering but some wasted space on left/right margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for waffle charts (no axes)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend well-placed at bottom with clear category labels
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waffle chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Values correctly mapped to square counts (42+28+18+12=100)
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 10x10 grid, percentages in legend, distinct colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 100 squares visible and accounted for
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows categories with percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Follows "{description} · {spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows proportional representation well, but only 4 categories (spec
+          suggests 2-6)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation is a perfect real-world use case mentioned in spec
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values sum to exactly 100%, realistic budget proportions
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses a custom class (Waffle) instead of simple script structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data, no random elements
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png (and plot.svg, plot.html)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Extends pygal's Graph class, uses pygal's SVG rendering, Style system,
+          and interactive HTML export
+  verdict: APPROVED
diff --git a/plots/waffle-basic/metadata/seaborn.yaml b/plots/waffle-basic/metadata/seaborn.yaml
index fa0413305c..dc855eac3c 100644
--- a/plots/waffle-basic/metadata/seaborn.yaml
+++ b/plots/waffle-basic/metadata/seaborn.yaml
@@ -23,3 +23,170 @@ review:
   - Legend could be positioned closer to the grid (large gap between chart and legend)
   - Grid fills top-to-bottom which is less intuitive than bottom-to-top (like filling
     a glass)
+  image_description: 'The plot displays a 10x10 waffle chart (100 squares) representing
+    budget allocation across 5 categories. The grid fills from the top-left corner,
+    moving left-to-right and top-to-bottom. Colors used: Blue (#306998) for Housing
+    (35 squares), Yellow (#FFD43B) for Food (25 squares), Green (#4CAF50) for Transportation
+    (20 squares), Orange (#FF7043) for Utilities (12 squares), and Purple (#9C27B0)
+    for Entertainment (8 squares). The title "waffle-basic · seaborn · pyplots.ai"
+    appears at the top in bold. A legend below the chart shows all categories with
+    their percentages. White grid lines separate each square. The plot uses a square
+    aspect ratio (12x12 inches) which is appropriate for waffle charts.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 28pt is clearly readable, legend at 18pt is well-sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Squares are well-sized with clear white borders
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good distinct colors, though blue and purple could be slightly closer
+          for some colorblind types
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Square format is perfect for waffle chart, good use of canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for waffle charts (no axes needed, correctly hidden)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend well placed below chart, but could be closer to the grid
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waffle chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories correctly mapped to colored squares
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 10x10 grid, distinct colors, legend with percentages
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 100 squares filled, values sum to 100%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all categories with percentages
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format `waffle-basic · seaborn · pyplots.ai`
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 5 categories with varying proportions, good range from 8% to
+          35%
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Budget allocation is a perfect real-world use case for waffle charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for household budget, though Housing at 35%
+          could be higher for realism
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → grid creation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation needed)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current seaborn/matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.heatmap creatively for waffle visualization, though heatmap
+          is a workaround since seaborn lacks native waffle support
+  verdict: APPROVED
diff --git a/plots/waterfall-basic/metadata/altair.yaml b/plots/waterfall-basic/metadata/altair.yaml
index dfb21efa8f..f6dced98e8 100644
--- a/plots/waterfall-basic/metadata/altair.yaml
+++ b/plots/waterfall-basic/metadata/altair.yaml
@@ -26,3 +26,180 @@ review:
     should show "$500" without the plus sign
   - No legend explaining the color scheme (blue=total, green=positive, red=negative)
   - Could benefit from interactive tooltips showing additional context
+  image_description: 'The plot displays a waterfall chart showing a quarterly financial
+    breakdown from Revenue to Net Income. The chart has 7 bars: Revenue (blue, starting
+    at $500), Cost of Goods (red, showing -$200 decrease), Gross Profit (blue subtotal
+    at $300), Operating Expenses (red, -$150), Other Income (green, +$25), Taxes (red,
+    -$45), and Net Income (blue final total at $130). Each bar shows a value label
+    in white text centered on the bar. Dashed connector lines connect the running
+    totals between bars. The color scheme uses Python Blue (#306998) for totals, green
+    (#4CAF50) for positive changes, and red (#E53935) for negative changes. The title
+    "waterfall-basic · altair · pyplots.ai" appears at the top. Y-axis shows "Amount
+    ($)" and X-axis shows "Category" with slightly angled labels.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text is clearly readable; title is 28pt, axis labels 18-22pt.
+          Slightly smaller than ideal but very legible.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; x-axis labels are angled to prevent overlap.
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar sizes are well-proportioned (size=65), white stroke provides
+          good separation.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Green/red color coding is common but uses distinct shades; blue totals
+          help differentiate. Could be improved with patterns for colorblind users.
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Chart fills canvas well with balanced margins.
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Amount ($)" with units, X-axis has "Category" which is
+          descriptive.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Subtle grid with alpha=0.3 and dashed lines. No legend needed as
+          colors are intuitive, but a small legend could help clarify the color scheme.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waterfall chart type.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, values on Y correctly assigned.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has positive/negative coloring, connector lines, total bars, and
+          value labels.
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show all data from 0 to 500.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend, but color scheme is self-explanatory with totals in blue.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "waterfall-basic · altair · pyplots.ai".'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive changes, negative changes, and subtotals. Good variety
+          but "Gross Profit" label shows +$500 when it should show $300 (minor inconsistency
+          in labeling - actually it correctly shows $300).
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly financial breakdown is a classic waterfall use case with
+          realistic categories.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for a business scenario ($500 revenue down
+          to $130 net income), though scale is simplified.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: No functions or classes; follows imports → data → plot → save pattern.
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (hardcoded values), no random seed needed, but
+          explicit about being deterministic would be better.
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair and pandas imported, both used.
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of Altair's layering (connectors, bars, labels), encoding
+          with y/y2, and declarative style. Could leverage more Altair-specific features
+          like tooltips or interactive selection.
+  verdict: APPROVED
diff --git a/plots/waterfall-basic/metadata/bokeh.yaml b/plots/waterfall-basic/metadata/bokeh.yaml
index 7ad48824b0..dab953fafb 100644
--- a/plots/waterfall-basic/metadata/bokeh.yaml
+++ b/plots/waterfall-basic/metadata/bokeh.yaml
@@ -26,3 +26,177 @@ review:
   - Missing HoverTool for interactivity (Bokeh key strength)
   - Red-green color scheme could be improved for colorblind accessibility (consider
     blue-orange)
+  image_description: 'The waterfall chart displays a quarterly financial breakdown
+    from "Starting Revenue" ($150,000) to "Net Income" ($130,000). Seven bars are
+    shown with distinct coloring: blue (#306998) for total bars (first and last),
+    green (#2ECC71) for positive changes (Product Sales +$50,000, Services +$35,000),
+    and red (#E74C3C) for negative changes (Refunds -$8,000, Operating Costs -$75,000,
+    Marketing -$22,000). Dashed gray connector lines show the cumulative flow between
+    bars. Value labels appear above each bar with appropriate formatting. The title
+    reads "waterfall-basic · bokeh · pyplots.ai" in the top-left. X-axis shows "Financial
+    Category" with rotated category labels, Y-axis shows "Amount ($)" with values
+    from 0 to ~250,000. Background is light gray (#FAFAFA) with subtle horizontal
+    grid lines.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, axis labels at 24pt, tick labels at 18pt, value labels
+          at 20pt - all readable, slight room for improvement on value label contrast
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, category labels properly rotated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars well-sized with good alpha (0.9), white line borders add definition
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Green/red scheme is common for financial data but not fully colorblind-safe;
+          blue for totals helps differentiate
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well, balanced margins, good use of 4800x2700 space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: '"Financial Category" and "Amount ($)" are descriptive with units'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend present (acceptable since colors are intuitive for financial
+          context), but grid is subtle
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waterfall chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X, amounts on Y, correctly positioned floating bars
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'Has: different colors for +/-, connecting lines, start/end totals
+          in blue, value labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis starts at 0, extends to ~250k with 15% padding
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed as color coding is self-explanatory in financial
+          context
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly formatted as "waterfall-basic · bokeh · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both positive and negative changes, totals at start/end; could
+          show more varied magnitudes
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly financial breakdown is a realistic, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are reasonable for a business context ($150k revenue down
+          to $130k net income)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → calculations → plot → style → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: false
+        comment: Data is deterministic (hardcoded), but no random seed comment indicating
+          intentional determinism
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, FactorRange, Label annotations, figure styling;
+          could leverage HoverTool for interactivity
+  verdict: APPROVED
diff --git a/plots/waterfall-basic/metadata/highcharts.yaml b/plots/waterfall-basic/metadata/highcharts.yaml
index 0fda50433e..acb0574690 100644
--- a/plots/waterfall-basic/metadata/highcharts.yaml
+++ b/plots/waterfall-basic/metadata/highcharts.yaml
@@ -24,3 +24,178 @@ review:
   - Grid lines are very subtle and could be more visible for easier value reading
   - Data labels on smaller bars appear somewhat cramped
   - X-axis Category label is not particularly informative for this financial context
+  image_description: 'The plot displays a waterfall chart showing a quarterly financial
+    breakdown. The chart starts with a tall blue "Revenue" bar at $500,000, followed
+    by orange bars representing decreases: Product Costs (-$150,000), Operating Expenses
+    (-$80,000), Marketing (-$45,000), R&D (-$35,000), and Taxes (-$52,000). There
+    is one teal/cyan bar for "Other Income" (+$20,000) representing an increase. The
+    final blue bar shows "Net Income" at $158,000. Connecting dotted lines link the
+    bars to show cumulative flow. Data labels appear on each bar showing the change
+    amount with +/- prefixes. The y-axis shows "Amount ($)" with values from $0 to
+    $540,000. X-axis labels are rotated at -45 degrees. The title follows the required
+    format. Colors are colorblind-safe (blue for totals, orange for decreases, teal
+    for increases).'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and labels are readable at full size, though data labels on
+          bars could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible with appropriate spacing
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue/orange/teal instead of red/green)
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good proportions, though bottom margin could use the full space better
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has "Amount ($)" with currency indicator
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend is disabled which is fine for this chart, but grid lines are
+          quite subtle/minimal
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waterfall chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has connecting lines, distinct colors for positive/negative/totals,
+          data labels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled for this single-series chart
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: "Quarterly Financial Breakdown · waterfall-basic
+          · highcharts · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive changes, negative changes, start total, and end total;
+          good variety of cost categories
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly financial breakdown is a classic waterfall use case with
+          realistic business categories
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values are realistic for a business scenario, though the ratio of
+          net income to revenue (~32%) is quite healthy
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → chart config → render → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png correctly, but also saves plot.html (acceptable
+          for interactive library)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts native waterfall type with isSum/isIntermediateSum,
+          custom formatters for data labels, but could leverage more Highcharts-specific
+          features like animations or drill-down
+  verdict: APPROVED
diff --git a/plots/waterfall-basic/metadata/letsplot.yaml b/plots/waterfall-basic/metadata/letsplot.yaml
index cb84f75fca..7ed0a397df 100644
--- a/plots/waterfall-basic/metadata/letsplot.yaml
+++ b/plots/waterfall-basic/metadata/letsplot.yaml
@@ -26,3 +26,175 @@ review:
     values between labeled ticks
   - Could use more distinct colorblind-safe palette (e.g., blue/orange instead of
     green/red)
+  image_description: |-
+    The waterfall chart displays a quarterly financial breakdown from Starting Balance to Net Profit. The chart uses:
+    - **Blue bars** (#306998) for totals (Starting Balance at $50,000 and Net Profit at $61,000)
+    - **Green bars** (#22C55E) for increases (Product Sales +$35,000, Service Revenue +$18,000)
+    - **Red bars** (#EF4444) for decreases (Operating Costs -$22,000, Marketing -$8,000, Taxes -$12,000)
+    - **White value labels** centered on each bar with proper formatting (totals show $XX,XXX, changes show +/-XX,XXX)
+    - **Dashed connector lines** between bars showing cumulative flow
+    - **Title** at top: "waterfall-basic · letsplot · pyplots.ai"
+    - **Y-axis** labeled "Amount ($)" with dollar formatting from $0 to $100,000
+    - **X-axis** labels rotated 30 degrees for readability
+    - **Legend** on right showing Change Type (Increase, Decrease, Total)
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, axis labels, and value labels are all clearly readable at
+          full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text; rotated x-axis labels prevent overlap
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible; connector lines are dashed
+          and distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Green/red are classic waterfall colors but may have colorblind concerns;
+          however, the totals in blue provide differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; legend placement on right is appropriate
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Y-axis has descriptive label with units: "Amount ($)"'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No visible gridlines; while this creates a clean look, subtle horizontal
+          gridlines would aid reading values
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waterfall chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values properly positioned
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: connector lines, color differentiation,
+          start/end totals, running labels'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows Increase, Decrease, Total
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "waterfall-basic · letsplot · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows positive changes, negative changes, and totals - demonstrates
+          all waterfall features
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Financial breakdown is plausible but somewhat generic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Realistic business financial values
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Linear script with no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random elements)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot grammar with geom_rect and geom_segment, but no interactive
+          features leveraged
+  verdict: APPROVED
diff --git a/plots/waterfall-basic/metadata/matplotlib.yaml b/plots/waterfall-basic/metadata/matplotlib.yaml
index 136c70ffd1..ed7cddbbdb 100644
--- a/plots/waterfall-basic/metadata/matplotlib.yaml
+++ b/plots/waterfall-basic/metadata/matplotlib.yaml
@@ -27,3 +27,176 @@ review:
   - No legend explaining the color coding (green=increase, red=decrease, blue=total)
   - X-axis label Category is generic - could be more descriptive like Financial Components
   - Red-green color combination may be challenging for colorblind users
+  image_description: 'The plot displays a waterfall chart showing a quarterly financial
+    breakdown from Revenue to Net Income. The chart features 7 bars: Revenue ($500,
+    green), Cost of Goods (-$200, red), Gross Profit ($300, blue subtotal), Operating
+    Expenses (-$150, red), Other Income ($25, green), Taxes (-$45, red), and Net Income
+    ($130, blue subtotal). Dashed gray connecting lines link the bars to show the
+    cumulative flow. Each bar has a white value label centered within it. The title
+    reads "waterfall-basic · matplotlib · pyplots.ai" at the top. X-axis labels are
+    rotated 15 degrees for readability. Y-axis shows "Amount ($)" and ranges from
+    0 to 500. A subtle y-axis grid with alpha=0.3 is present.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt, value labels
+          at 16pt bold - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, x-axis labels rotated appropriately
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths (0.6) are well-sized, connecting lines clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Green/red/blue color scheme is distinguishable but red-green combination
+          is not ideal for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas well with good margins, tight_layout applied
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has units "Amount ($)", but X-axis label "Category" is generic
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), but no legend explaining color meanings
+          (totals vs increases vs decreases)
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waterfall chart implementation
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Has connecting lines, color differentiation, start/end totals in
+          distinct color (blue), value labels on bars
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Y-axis shows all data from 0 to beyond max value
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for this chart type (colors self-explanatory with context)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "waterfall-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows positive changes (Revenue, Other Income), negative changes
+          (Cost of Goods, Operating Expenses, Taxes), and subtotals (Gross Profit,
+          Net Income)
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Quarterly financial breakdown is a perfect real-world use case for
+          waterfall charts
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for a financial statement (Revenue $500, costs
+          and expenses proportional)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple sequential structure: imports → data → calculations → plot
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses deterministic hardcoded data, no randomness
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib API with ax methods
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Basic matplotlib bar chart implementation without leveraging any
+          distinctive features like annotations, patches, or advanced styling
+  verdict: APPROVED
diff --git a/plots/waterfall-basic/metadata/plotly.yaml b/plots/waterfall-basic/metadata/plotly.yaml
index c7822baf87..f87dae3213 100644
--- a/plots/waterfall-basic/metadata/plotly.yaml
+++ b/plots/waterfall-basic/metadata/plotly.yaml
@@ -25,3 +25,165 @@ review:
   - Red/green color scheme could be improved for colorblind accessibility (consider
     using blue/orange palette)
   - Could add a simple legend explaining the color meanings for clarity
+  image_description: The plot displays a waterfall chart showing a quarterly financial
+    breakdown from Revenue ($500,000) to Net Income ($153,750). Blue bars represent
+    totals (Revenue and Net Income), red bars show decreases (Product Costs, Operating
+    Expenses, Marketing, Taxes), and a green bar shows an increase (Other Income).
+    Dotted gray connector lines link consecutive bars to emphasize the cumulative
+    flow. Value labels appear outside each bar with dollar formatting. The Y-axis
+    shows "Amount ($)" with proper currency tick formatting, and the X-axis shows
+    "Category". The title follows the required format centered at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 38
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: all text clearly readable at full size
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: no overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: bars well-sized and clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: red/green used but blue totals help distinguish
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent use of canvas space
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: descriptive with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: subtle grid, no legend but colors self-explanatory
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: correct waterfall chart type
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: categories and values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: connecting lines, color coding, value labels all present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: all data visible
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A, colors are self-explanatory
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: uses `waterfall-basic · plotly · pyplots.ai`
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: shows positive change, negative changes, and totals
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: plausible quarterly financial breakdown
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: realistic business values (~31% net margin)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: clean imports → data → plot → save
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: deterministic data but no explicit seed comment
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: only used imports
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: saves as plot.png
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: excellent use of Plotly's native `go.Waterfall` trace
+  verdict: APPROVED
diff --git a/plots/waterfall-basic/metadata/plotnine.yaml b/plots/waterfall-basic/metadata/plotnine.yaml
index eb0fac7ce2..3459e1af7f 100644
--- a/plots/waterfall-basic/metadata/plotnine.yaml
+++ b/plots/waterfall-basic/metadata/plotnine.yaml
@@ -15,3 +15,4 @@ review:
   strengths: []
   weaknesses: []
   improvements: []
+  verdict: APPROVED
diff --git a/plots/waterfall-basic/metadata/pygal.yaml b/plots/waterfall-basic/metadata/pygal.yaml
index 6aac202109..10c3958bdd 100644
--- a/plots/waterfall-basic/metadata/pygal.yaml
+++ b/plots/waterfall-basic/metadata/pygal.yaml
@@ -26,3 +26,177 @@ review:
   - Empty spacer series appears in legend (should be hidden or legend entry suppressed)
   - Missing connecting lines between bars that spec suggests for emphasizing cumulative
     flow
+  image_description: The plot displays a waterfall chart showing a quarterly financial
+    breakdown from Q1 Revenue to Net Income. The chart uses a colorblind-friendly
+    palette with blue for totals (Q1 Revenue starting at $500K, Net Income ending
+    at $387K), teal for increases (Product Sales +$150K, Services +$80K, Other Income
+    +$25K), and orange for decreases (COGS -$180K, Operating Exp -$120K, Taxes -$68K).
+    Each bar is labeled with its value in white text. The x-axis labels include both
+    category names and running totals in parentheses (e.g., "Q1 Revenue ($500K)").
+    The y-axis is labeled "Amount ($K)" and the title correctly follows the format
+    "waterfall-basic · pygal · pyplots.ai". A legend at the bottom shows Total, Increase,
+    and Decrease categories.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; bar value labels are
+          visible but could be slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; x-axis labels are rotated to avoid
+          collision
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Bars are well-sized and clearly visible; floating waterfall effect
+          is achieved correctly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent colorblind-friendly palette using blue/teal/orange instead
+          of red/green
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good layout with appropriate margins; legend placement at bottom
+          works well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Y-axis has units "Amount ($K)", x-axis labeled "Category"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: Legend shows an empty series entry for the spacer; y-grid lines present
+          but subtle
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct waterfall chart type with floating bars
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories and values correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has start/end totals, positive/negative coloring, value labels; missing
+          connecting lines between bars as spec suggests
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axis range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend labels are correct but includes empty spacer series
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "waterfall-basic · pygal · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows positive changes, negative changes, and totals; good variety
+          of values
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent real-world scenario: quarterly financial breakdown from
+          revenue to net income'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Values are realistic for business financials in $K
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean linear structure without functions or classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 3
+        passed: true
+        comment: Deterministic data (no random seed needed), but data is hardcoded
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only pygal and Style imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 4
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of StackedBar for waterfall effect, custom Style, value
+          printing, legend configuration
+  verdict: APPROVED
diff --git a/plots/windrose-basic/metadata/altair.yaml b/plots/windrose-basic/metadata/altair.yaml
index 014ad6106b..f9bbdb75f2 100644
--- a/plots/windrose-basic/metadata/altair.yaml
+++ b/plots/windrose-basic/metadata/altair.yaml
@@ -24,3 +24,178 @@ review:
   weaknesses:
   - Percentage labels (5%, 10%, etc.) could be slightly larger for better visibility
   - The Frequency (%) axis title text overlaps slightly with the 15%/20% labels area
+  image_description: The plot displays a wind rose chart with 8 directional sectors
+    (N, NE, E, SE, S, SW, W, NW) arranged in polar coordinates with North at the top.
+    Each sector contains stacked wedge segments representing wind speed ranges, colored
+    in a progression from Python blue (0-3 m/s) through yellow (3-6 m/s), gold (6-9
+    m/s), orange (9-12 m/s), to red-orange (>12 m/s). The chart shows a clear prevailing
+    westerly/southwesterly wind pattern, with the W, SW, and NW sectors having the
+    longest extensions (~25% frequency). The title "windrose-basic · altair · pyplots.ai"
+    appears at the top in large font. A legend labeled "Wind Speed" is positioned
+    on the right showing all 5 speed categories. Concentric circles mark frequency
+    percentages (5%, 10%, 15%, 20%, 25%) with a "Frequency (%)" label. Light gray
+    radial lines extend from the center to each direction label.
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title, direction labels, and legend are clearly readable; percentage
+          labels slightly small but acceptable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; percentage labels positioned well between
+          N and NW
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Wedges are clearly visible and well-sized; stacked segments distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue-yellow-orange-red palette is colorblind-friendly; good contrast
+          between segments
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square layout appropriate for polar chart; good canvas utilization;
+          legend positioned well
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Frequency (%)" label present but no units on direction (N/S/E/W
+          are standard, so acceptable)'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid circles are subtle; legend well-placed; radial lines perhaps
+          slightly dense
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct wind rose polar histogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction mapped to angle, speed to color, frequency to radius
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has 8 sectors, speed bins, stacking, legend; cool-to-warm colors
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All directions visible, frequency axis shows full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend shows all 5 speed ranges correctly
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Correct format used (spec-id · library · pyplots.ai)
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows prevailing wind pattern, multiple speed ranges, varied frequencies
+          across directions
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulated year of hourly wind data with Weibull distribution; westerly
+          prevailing pattern realistic
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Wind speeds 0-12+ m/s realistic; frequencies as percentages sensible
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only altair, numpy, pandas used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: false
+        comment: Current API usage
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves as plot.png and plot.html (correct)
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Creative use of mark_line with filled=True for polygon wedges, layered
+          composition, tooltips, declarative encoding
+  verdict: APPROVED
diff --git a/plots/windrose-basic/metadata/bokeh.yaml b/plots/windrose-basic/metadata/bokeh.yaml
index 7e80ab21ca..0ec802b0c0 100644
--- a/plots/windrose-basic/metadata/bokeh.yaml
+++ b/plots/windrose-basic/metadata/bokeh.yaml
@@ -25,3 +25,185 @@ review:
   - Legend text appears small relative to the plot size (18pt vs 22pt direction labels)
   - The percentage labels on the concentric circles could be larger for better readability
   - Missing explicit Frequency (%) label for the radial axis
+  image_description: The wind rose chart displays a polar histogram with 8 directional
+    spokes (N, NE, E, SE, S, SW, W, NW). North is correctly positioned at the top.
+    The visualization shows stacked wedge segments colored from cool blue (#306998,
+    0-3 m/s) through teal, yellow, orange, to red/coral (>12 m/s) representing increasing
+    wind speeds. The dominant wind directions are clearly W, NW, and SW, matching
+    the coastal weather station scenario. Five concentric reference circles are visible
+    (5%, 10%, 15%, 20%, 25%) with percentage labels. The legend on the right shows
+    all 5 speed categories. The title "windrose-basic · bokeh · pyplots.ai" is at
+    the top, and "Wind Speed (m/s)" appears at the bottom. The plot uses a square
+    1:1 aspect ratio appropriate for polar visualizations.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title at 32pt, direction labels at 22pt, and reference labels at
+          16pt are all clearly readable. Slightly better would be larger percentage
+          labels.
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No text overlapping; all direction labels and percentage markers
+          are well-spaced.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Wedge segments are clearly visible with good separation. The innermost
+          blue segments are a bit small for the smallest frequencies.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Color progression from blue→teal→yellow→orange→red is colorblind-friendly
+          (not relying on red-green distinction alone).
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills the canvas well with balanced margins; legend properly
+          positioned on the right.
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: The subtitle says "Wind Speed (m/s)" but there's no explicit axis
+          label for direction or frequency.
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Concentric circles are subtle (alpha 0.5), legend is well-placed.
+          Grid lines could be slightly more subtle.
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar stacked histogram (wind rose) type.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction correctly mapped to angular position, speed to color, frequency
+          to radial extent.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 8 direction bins, stacked speed segments,
+          legend, percentage labels.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All directions and speed ranges visible.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly labels all 5 speed ranges.
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{spec-id} · {library} · pyplots.ai" format.
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows varied wind patterns across directions with clear dominant
+          directions (W, NW, SW). Could show slightly more variation in speed distribution
+          across directions.
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Coastal weather station with prevailing westerly winds is a realistic
+          and well-documented meteorological scenario.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind speeds 0-12+ m/s are realistic. The Weibull distribution is
+          the standard for wind speed modeling. Percentages reaching ~25% are reasonable.
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script structure: imports → data → plot → save. No functions
+          or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42).
+      - id: CQ-03
+        name: Clean Imports
+        score: 1
+        max: 2
+        passed: true
+        comment: All imports used, but output_file and save could arguably be omitted
+          for PNG-only output (though HTML is a valid Bokeh feature).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Bokeh API.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ColumnDataSource, patches with nested arrays, custom Legend
+          with LegendItem. Could leverage more Bokeh-specific features like HoverTool
+          for interactivity, but this is a static PNG output.
+  verdict: APPROVED
diff --git a/plots/windrose-basic/metadata/highcharts.yaml b/plots/windrose-basic/metadata/highcharts.yaml
index 733123ed78..9c3dd8efd9 100644
--- a/plots/windrose-basic/metadata/highcharts.yaml
+++ b/plots/windrose-basic/metadata/highcharts.yaml
@@ -24,3 +24,169 @@ review:
     removing or repositioning
   - Uses manual JSON config instead of highcharts-core Python library (acceptable
     but less idiomatic)
+  image_description: 'The plot displays a polar wind rose chart with 8 compass direction
+    sectors (N, NE, E, SE, S, SW, W, NW). The chart uses stacked bars radiating from
+    the center, with 5 color-coded speed ranges: dark blue (0-3 m/s), teal (3-6 m/s),
+    yellow (6-9 m/s), orange (9-12 m/s), and red (>12 m/s). The dominant wind directions
+    are clearly W, SW, and S, showing a prevailing westerly pattern. The title "windrose-basic
+    · highcharts · pyplots.ai" is at the top with subtitle "Annual Wind Pattern Distribution".
+    A legend on the right shows "Wind Speed" categories. Radial axis shows frequency
+    percentages from 0% to 24%. Grid lines are subtle gray circles.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title, direction labels, and axis percentages are all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Stacked bars are well-sized and distinguishable
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe progression from cool blue to warm red/orange
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Good but plot is slightly offset left with large right margin for
+          legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Frequency (%) label present with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: The "Frequency (%)" y-axis title overlaps with the percentage labels
+          on the radial axis
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar stacked histogram (wind rose)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction on angular axis, frequency on radial axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 8 direction sectors, 5 speed bins, stacked colors
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axis shows all data up to ~24%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows speed ranges with colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows varied wind patterns: dominant W/SW, secondary directions,
+          all speed ranges'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Annual hourly wind data (8760 obs) with realistic Weibull speed distribution
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequencies 0-24% are realistic for meteorological data
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear structure: imports → data → config → render'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 0
+        max: 2
+        passed: false
+        comment: json import is used but numpy and other imports are all needed
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Creates plot.png correctly
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts polar chart with stacking, but manually constructs
+          config instead of using highcharts-core Python library
+  verdict: APPROVED
diff --git a/plots/windrose-basic/metadata/letsplot.yaml b/plots/windrose-basic/metadata/letsplot.yaml
index 32c1de9386..ed00428429 100644
--- a/plots/windrose-basic/metadata/letsplot.yaml
+++ b/plots/windrose-basic/metadata/letsplot.yaml
@@ -26,3 +26,178 @@ review:
     could be enhanced)
   - Radial axis label Frequency (%) appears on left side which is unconventional for
     polar plots
+  image_description: The plot displays a polar wind rose chart with North (N) at the
+    top. The chart shows 16 direction sectors as stacked bar segments radiating from
+    the center. The dominant wind directions are clearly NW (Northwest) and W (West),
+    with bars extending to approximately 14% and 8% frequency respectively. Colors
+    progress from cool blues (dark blue for 0-3 m/s, medium blue for 3-6 m/s, light
+    blue for 6-9 m/s) to warm colors (yellow for 9-12 m/s, orange for 12-15 m/s, red
+    for 15+ m/s). The 8 cardinal/ordinal direction labels (N, NE, E, SE, S, SW, W,
+    NW) are positioned around the perimeter. The y-axis shows "Frequency (%)" ranging
+    from 0 to 16. A legend titled "Wind Speed" appears on the right side showing all
+    6 speed categories. The title "windrose-basic · letsplot · pyplots.ai" is centered
+    at the top.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title is large and clear (24pt), axis labels readable, legend text
+          appropriately sized
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, direction labels well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar widths and stacking clearly show data density, alpha=0.9 provides
+          good visibility
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Cool-to-warm progression is colorblind-safe, good contrast between
+          categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square format appropriate for polar plot, slight asymmetry due to
+          data distribution but good canvas utilization
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Y-axis has "Frequency (%)" with units, but radial nature makes traditional
+          axis labels less applicable
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (good), legend is clear, but legend positioning could
+          be slightly closer to plot
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct wind rose polar stacked histogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction to angular position, speed to color, frequency to bar length
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 16 sectors, 6 speed bins, stacked segments, legend present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, radial axis shows full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 6 speed categories with units
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format "windrose-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows prevailing westerlies with secondary NE component, multiple
+          speed categories visible, variation across directions
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Simulates 1 year of hourly wind data with Weibull distribution, realistic
+          meteorological pattern
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Wind speeds 0-25 m/s realistic, frequencies as percentages appropriate
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → aggregation → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (numpy, pandas, lets_plot)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current lets-plot API used
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 0
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 0
+        max: 5
+        passed: false
+        comment: Uses standard ggplot grammar available in most ggplot-like libraries,
+          no distinctive lets-plot features like tooltips or interactive elements
+          demonstrated
+  verdict: APPROVED
diff --git a/plots/windrose-basic/metadata/matplotlib.yaml b/plots/windrose-basic/metadata/matplotlib.yaml
index aa41c65009..1d1bf59c07 100644
--- a/plots/windrose-basic/metadata/matplotlib.yaml
+++ b/plots/windrose-basic/metadata/matplotlib.yaml
@@ -27,3 +27,183 @@ review:
     corner rather than being positioned closer to the plot area
   - The 15+ m/s (red) speed category is present in legend but barely visible in data,
     making the legend somewhat misleading about what speeds are actually represented
+  image_description: The wind rose plot displays a polar histogram showing wind direction
+    and speed distribution. North is positioned at the top with 16 compass direction
+    labels (N, NNE, NE, ENE, E, ESE, SE, SSE, S, SSW, SW, WSW, W, WNW, NW, NNW) arranged
+    around the perimeter in bold black text. The radial axis shows frequency percentages
+    from 0% to 18% with concentric dashed circles. Each spoke represents a direction
+    sector with stacked colored bars indicating wind speed ranges. The color scheme
+    progresses from dark blue (0-3 m/s) through lighter blues (3-6, 6-9 m/s) to yellow
+    (9-12 m/s) and orange (12-15 m/s), with red (15+ m/s) shown in the legend but
+    barely visible in the data. The dominant winds are from the SW-W-WNW-NW sectors,
+    showing a clear prevailing westerly pattern. A legend box in the lower-left corner
+    shows "Wind Speed" with all six speed ranges. The title "windrose-basic · matplotlib
+    · pyplots.ai" appears at the top in bold black text.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, direction labels at 16pt bold, percentage labels at
+          14pt, all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, direction labels well-spaced around perimeter
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments clearly visible, white edge separation between stacked
+          segments, appropriate bar width
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Cool-to-warm progression is colorblind-friendly, good contrast between
+          speed categories
+      - id: VQ-05
+        name: Layout Balance
+        score: 3
+        max: 5
+        passed: true
+        comment: Square format appropriate for polar plot, but legend positioned outside
+          plot area creates some imbalance; plot utilizes canvas well
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Radial axis shows frequency in percentage format, direction labels
+          are standard compass notation
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Grid is subtle (alpha=0.3) which is good, but legend placement at
+          bbox_to_anchor=(-0.15, -0.15) causes it to float isolated in corner rather
+          than being positioned closer to the plot area
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct polar stacked histogram (wind rose)
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction mapped to angular position, speed to color, frequency to
+          radial extent
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 16 direction sectors, 6 speed bins with stacked segments, North at
+          top, clockwise direction
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 6 speed ranges with units (m/s)
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "windrose-basic · matplotlib · pyplots.ai"
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows prevailing winds (SW), secondary pattern (NW), variable directions,
+          multiple speed categories with realistic distribution
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Simulates 8760 hourly readings (one year), uses Weibull distribution
+          for wind speeds which is meteorologically appropriate
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind speeds 0-25 m/s are reasonable; however, the 15+ m/s category
+          is barely represented in the visualization despite being in the legend
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data generation → binning → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at the beginning
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current matplotlib APIs
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 2
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 2
+        max: 5
+        passed: true
+        comment: Uses matplotlib's polar projection correctly with set_theta_zero_location
+          and set_theta_direction, but this is basic polar chart functionality rather
+          than advanced matplotlib features
+  verdict: APPROVED
diff --git a/plots/windrose-basic/metadata/plotly.yaml b/plots/windrose-basic/metadata/plotly.yaml
index 142ef64d16..e6172717c3 100644
--- a/plots/windrose-basic/metadata/plotly.yaml
+++ b/plots/windrose-basic/metadata/plotly.yaml
@@ -25,3 +25,183 @@ review:
     represent
   - Color palette progression from dark blue to light blue could have slightly more
     contrast for the lowest speed bins
+  image_description: The plot displays a wind rose chart on a polar coordinate system
+    with 8 compass directions (N, NE, E, SE, S, SW, W, NW) arranged clockwise with
+    North at the top. Each direction shows stacked bars representing wind speed frequency
+    distributions. The color scheme progresses from dark navy blue (0-3 m/s) through
+    light blue (3-6 m/s), yellow (6-9 m/s), orange (9-12 m/s) to coral red (>12 m/s).
+    The dominant wind directions are SW and W, showing the highest frequencies (~22-25%).
+    The radial axis displays percentage values (5%, 10%, 15%, 20%) at 45-degree angle.
+    A legend titled "Wind Speed" on the right side clearly identifies the 5 speed
+    categories. The title "windrose-basic · plotly · pyplots.ai" is centered at the
+    top.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 32pt, direction labels at 22pt, radial ticks at 18pt, legend
+          at 18-20pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, all labels clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Bar segments are well-sized and clearly visible, stacking is clear
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good progression from cool to warm colors; blue-yellow-orange-red
+          is generally colorblind-friendly, though blue/navy distinction could be
+          slightly clearer
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas, polar plot is well-centered, legend placement
+          is appropriate; slight gap between plot and legend
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: Polar chart - no traditional axis labels, but radial axis has % suffix
+          which is informative. Direction labels serve as angular axis. No explicit
+          units label for "Frequency"
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle circular grid lines, legend is well-placed with border and
+          appropriate sizing
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct wind rose / polar stacked bar chart
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction mapped to angular axis, frequency to radial, speed to color/stacking
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 8 direction sectors, 5 speed bins, stacked representation, legend
+          with speed ranges
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, radial axis extends appropriately to ~22%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 speed categories with accurate labels
+          and colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "windrose-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variation in direction frequency (SW/W dominant), different
+          speed distributions per direction, realistic Weibull speed distribution;
+          could show slightly more variation in speed composition across directions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent: simulated hourly data for one year (8760 obs), prevailing
+          westerly winds typical of mid-latitude locations, Weibull distribution for
+          wind speeds is meteorologically accurate'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind speeds 0-12+ m/s are realistic, percentages are reasonable;
+          very small frequencies for N/NE directions could have slightly more representation
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → binning → figure → save structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at beginning
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects used, both necessary
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API (go.Barpolar, write_image, write_html)
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Barpolar which is appropriate for wind rose, polar layout
+          configuration, and generates both PNG and HTML for interactivity. Could
+          further leverage Plotly's hover customization for speed/direction details.
+  verdict: APPROVED
diff --git a/plots/windrose-basic/metadata/plotnine.yaml b/plots/windrose-basic/metadata/plotnine.yaml
index 4e0175962f..5374015d34 100644
--- a/plots/windrose-basic/metadata/plotnine.yaml
+++ b/plots/windrose-basic/metadata/plotnine.yaml
@@ -25,3 +25,198 @@ review:
     and may be confusing
   - Small innermost wedge segments are harder to visually distinguish due to their
     size
+  image_description: The plot displays a wind rose chart with 8 directional spokes
+    (N, NE, E, SE, S, SW, W, NW) arranged in a polar layout with North at the top.
+    Each spoke contains stacked colored wedge segments representing wind speed bins.
+    The colors progress from dark blue (0-5 m/s, calm winds) through light blue (5-10
+    m/s), green (10-15 m/s), yellow (15-20 m/s), to orange (20+ m/s, strong winds).
+    The SW direction shows the longest spokes, indicating prevailing winds from that
+    direction. Concentric dashed gridlines at 5%, 10%, and 15% frequency are visible,
+    with percentage labels positioned near the N spoke. The title "windrose-basic
+    · plotnine · pyplots.ai" appears at the top. A legend on the right side labeled
+    "Wind Speed" shows all 5 speed categories with their colors and units (m/s). Direction
+    labels (N, NE, E, etc.) are positioned outside the chart in bold black text. The
+    background is clean white.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: 'Title is clear at ~24pt, direction labels are bold and readable,
+          frequency labels are visible. Legend text is adequately sized. Minor: frequency
+          axis label could be slightly larger.'
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements. Direction labels, frequency percentages,
+          and legend are all well-separated.
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Wedges are clearly visible with good alpha (0.9), white borders separate
+          segments well. Small gaps between adjacent direction wedges aid readability.
+          Minor deduction for very small innermost wedge segments being slightly harder
+          to distinguish.
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Color progression from blue→green→yellow→orange is colorblind-friendly
+          (avoids pure red-green differentiation). Sequential color scheme is intuitive.
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Plot is well-centered, good use of canvas space. Legend positioned
+          appropriately on the right. Minor: slight asymmetry in axis limits creates
+          small imbalance.'
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: '"Frequency (%)" label is present and italicized. Wind speed legend
+          includes units (m/s). However, this is a polar plot without traditional
+          axes - partial credit.'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Dashed gridlines at appropriate alpha (0.7), legend is well-placed
+          with border and clear labels.
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct wind rose chart as polar stacked histogram showing direction
+          and speed distribution.
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction correctly mapped to angular position, frequency to radial
+          extent, speed to stacked color segments.
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: 8 direction bins, 5 speed bins, stacked
+          segments, cool-to-warm colors, legend with speed ranges, radial frequency
+          axis.'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, gridlines extend to 15% covering the data range
+          appropriately.
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows all 5 speed categories with accurate labels
+          and m/s units.
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format "windrose-basic · plotnine · pyplots.ai" but
+          uses regular dot · character (acceptable).
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: 'Shows prevailing SW winds, variation across directions, all speed
+          bins represented. Good demonstration of wind patterns. Minor: could show
+          more dramatic variation in highest speed bins.'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Coastal weather station scenario is realistic and well-documented
+          in code comments. Prevailing SW winds are typical for coastal locations.
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: 'Wind speed ranges (0-5, 5-10, 10-15, 15-20, 20+ m/s) are realistic.
+          Frequency percentages are plausible. Minor: total frequencies across all
+          bins should ideally sum to ~100%.'
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → polygon construction → plot → save.
+          No functions or classes.'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42), though data is actually deterministic (hardcoded
+          frequencies).
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used (numpy, pandas, math, plotnine components).
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: true
+        comment: Uses older plotnine parameter naming but functional.
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with dpi=300.
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses plotnine grammar of graphics (ggplot + geom_polygon + geom_line
+          + geom_text + theme). Manual polygon construction is necessary since plotnine
+          lacks native coord_polar. Shows good understanding of the library but the
+          workaround is complex.
+  verdict: APPROVED
diff --git a/plots/windrose-basic/metadata/seaborn.yaml b/plots/windrose-basic/metadata/seaborn.yaml
index 8cb071ad4d..1c4e757e20 100644
--- a/plots/windrose-basic/metadata/seaborn.yaml
+++ b/plots/windrose-basic/metadata/seaborn.yaml
@@ -24,3 +24,185 @@ review:
   - Missing explicit radial axis label (should indicate Frequency or similar)
   - Seaborn is used primarily for styling rather than seaborn plotting functions -
     actual bars drawn with matplotlib polar bar plot
+  image_description: The plot displays a polar wind rose chart with 16 directional
+    sectors. The title "windrose-basic · seaborn · pyplots.ai" is prominently displayed
+    at the top in bold black text. North (N) is correctly positioned at the top, with
+    compass directions (NE, E, SE, S, SW, W, NW) labeled around the perimeter in bold
+    black text. The chart shows stacked colored bars representing wind speed categories,
+    progressing from pale yellow (0-3 m/s) through orange shades (3-6, 6-10 m/s) to
+    red/crimson (10-15, 15+ m/s). The dominant wind direction is clearly from the
+    SW, with a prominent ~30% frequency spike showing strong winds (many red/orange
+    segments). A secondary NE pattern (~10% frequency) is visible with moderate speeds.
+    The radial axis shows percentage labels (0%, 5%, 10%, 15%, 20%, 25%, 30%) with
+    subtle gray grid lines. A legend titled "Wind Speed" is positioned in the lower
+    right corner with a white background and gray border, showing all five speed categories
+    with their colors.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt bold, direction labels at 18pt bold, radial labels
+          at 14pt, all perfectly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, clean separation between all labels
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Stacked bars are clearly visible with good alpha (0.9), white edges
+          provide separation; minor deduction for some thin segments in less frequent
+          directions
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: YlOrRd palette progresses from yellow to red, colorblind-friendly
+          sequential palette
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Square 12x12 figure works well for polar plot, but some empty space
+          in corners; legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: true
+        comment: No explicit axis labels (radial axis shows only percentages without
+          "Frequency" label)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Grid subtle at alpha 0.3, legend well-placed with good styling
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct wind rose chart as polar stacked histogram
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Direction mapped to theta, speed to stacked segments, frequency to
+          radial extent
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 16 direction bins, 5 speed ranges, stacked colored segments, legend
+          present
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full 360° coverage, radial axis properly scaled to max ~30%
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend accurately shows speed ranges with matching colors
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Exact format "windrose-basic · seaborn · pyplots.ai" with middle
+          dots
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows prevailing SW winds, secondary NE pattern, varying speeds by
+          direction; good distribution but could show more calm conditions
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 8760 observations (one year hourly) with realistic Weibull-distributed
+          speeds and dominant SW/NE patterns typical of many locations
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Wind speeds 0-25 m/s realistic, though max 25 m/s is on the high
+          end for typical hourly data
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear script: imports → data → bins → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set at start
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib, numpy, seaborn used (all necessary)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 0
+        max: 1
+        passed: false
+        comment: Uses `strict=True` in zip which is fine, but line 65 has overly complex
+          logic
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses sns.set_style("whitegrid"), sns.set_context("talk"), sns.color_palette("YlOrRd");
+          however, the actual plotting is done with matplotlib's ax.bar on polar projection,
+          not seaborn plotting functions
+  verdict: APPROVED
diff --git a/plots/wireframe-3d-basic/metadata/bokeh.yaml b/plots/wireframe-3d-basic/metadata/bokeh.yaml
index 32a82cc881..9a0ecfe40e 100644
--- a/plots/wireframe-3d-basic/metadata/bokeh.yaml
+++ b/plots/wireframe-3d-basic/metadata/bokeh.yaml
@@ -23,3 +23,168 @@ review:
   - Formula annotation (z = sin(sqrt(x² + y²))) not visible in the rendered image
   - Axis tick marks not present as specified in requirements
   - X and Y axis labels positioned at edges are relatively small compared to wireframe
+  image_description: The plot displays a 3D wireframe surface showing the ripple function
+    z = sin(sqrt(x² + y²)). The wireframe is rendered in Python Blue (#306998) with
+    a 30x30 grid creating a smooth, see-through mesh. The surface shows a central
+    peak with concentric wave-like ripples extending outward, viewed from a 3D perspective
+    (elevation 30°, azimuth 45°). Three custom axis lines (X, Y, Z) are positioned
+    at the projected origin with arrows and bold labels. The title "wireframe-3d-basic
+    · bokeh · pyplots.ai" appears at the top left. The background is a light gray
+    (#f8f9fa) providing good contrast with the blue wireframe.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels clearly visible, though axis labels could be
+          slightly larger
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Wireframe lines well-sized with good alpha transparency
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color scheme, accessible blue
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Wireframe fills canvas well with balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: X, Y, Z labels present but no units (mathematical function)
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: false
+        comment: No grid needed for 3D projection, no legend needed
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct wireframe 3D visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y grid correctly mapped to Z values
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Grid lines in both directions present; custom axes shown but missing
+          tick marks
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within canvas
+      - id: SC-05
+        name: Legend Accuracy
+        score: 1
+        max: 2
+        passed: true
+        comment: Formula annotation present but could be more visible
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: wireframe-3d-basic · bokeh · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows ripple pattern clearly, demonstrates topology
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Classic mathematical ripple function
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: 30x30 grid with -4 to 4 range appropriate
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Sequential structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Current Bokeh API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Formula label not visible in rendered image
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses multi_line, Label, patch for arrows; creative 2D projection
+          approach for 3D visualization
+  verdict: APPROVED
diff --git a/plots/wireframe-3d-basic/metadata/highcharts.yaml b/plots/wireframe-3d-basic/metadata/highcharts.yaml
index c2ce1736ff..791b0337f4 100644
--- a/plots/wireframe-3d-basic/metadata/highcharts.yaml
+++ b/plots/wireframe-3d-basic/metadata/highcharts.yaml
@@ -24,3 +24,175 @@ review:
   - Z-axis tick labels and axis title overlap slightly in the lower right corner
   - Grid/legend score reduced as no visible gridlines on the 3D frame floor for spatial
     reference
+  image_description: 'The plot shows a 3D wireframe visualization of the ripple function
+    z = sin(√(x² + y²)). The wireframe mesh is rendered in two shades of blue (#306998
+    for X-direction lines and #1e4c73 for Y-direction lines), creating a clear grid
+    pattern. The surface shows the characteristic concentric ripple pattern emanating
+    from the origin. The plot has three labeled axes: X Position (units) ranging from
+    -5 to 5, Y Position (units) ranging from -5 to 5, and Z Amplitude ranging from
+    -1.2 to 1.2. The title "wireframe-3d-basic · highcharts · pyplots.ai" is displayed
+    at the top with a subtitle showing the mathematical formula. The 3D perspective
+    provides a good viewing angle showing the surface topology. A subtle blue-tinted
+    3D frame provides depth context.'
+  criteria_checklist:
+    visual_quality:
+      score: 35
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable; tick labels on Z-axis
+          slightly overlap with axis title
+      - id: VQ-02
+        name: No Overlap
+        score: 7
+        max: 8
+        passed: true
+        comment: Minor overlap on Z-axis where tick labels meet the axis title
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Wireframe lines are clearly visible with appropriate line width;
+          two-tone coloring enhances depth perception
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Colorblind-safe blue palette used throughout
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization; 3D surface fills most of the plot area,
+          slight asymmetry due to 3D perspective
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: All three axes have descriptive labels with units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: No legend needed; subtle 3D frame gridlines present
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D wireframe visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y, Z correctly mapped to position coordinates
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines in both X and Y directions, 3D perspective, all axes labeled
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible within axes ranges
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed for wireframe
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: wireframe-3d-basic · highcharts · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows ripple function well with peaks and troughs; could show more
+          varied surface complexity
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Mathematical function visualization is appropriate; z = sin(√(x²
+          + y²)) is a classic demo function
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Sensible axis ranges for the mathematical function
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports → data → plot → save structure
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set (though not strictly needed for deterministic
+          function)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Using current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Highcharts 3D scatter with line connections to simulate wireframe;
+          creative workaround for a library that doesn't natively support wireframe
+          plots
+  verdict: APPROVED
diff --git a/plots/wireframe-3d-basic/metadata/matplotlib.yaml b/plots/wireframe-3d-basic/metadata/matplotlib.yaml
index 89f6be34d9..f891a029de 100644
--- a/plots/wireframe-3d-basic/metadata/matplotlib.yaml
+++ b/plots/wireframe-3d-basic/metadata/matplotlib.yaml
@@ -22,3 +22,168 @@ review:
   - Axis labels use generic X, Y, Z instead of more descriptive labels
   - Wireframe linewidth at 1.5 is slightly thin for 4800x2700 output; 2-3 would be
     more visible
+  image_description: The plot displays a 3D wireframe visualization of the ripple
+    function z = sin(sqrt(x² + y²)). The wireframe is rendered in Python Blue (#306998)
+    with consistent linewidth creating a mesh grid structure. The surface shows the
+    characteristic concentric ripple pattern emanating from the center (0,0), with
+    peaks at approximately z=1 and troughs at z=-1. The viewing angle is set at elevation
+    30° and azimuth 45°, providing a good perspective of the 3D structure. All three
+    axes (X, Y, Z) are labeled with clear tick marks ranging from -6 to 6 for X and
+    Y, and -1.0 to 1.0 for Z. The title "wireframe-3d-basic · matplotlib · pyplots.ai"
+    appears at the top. The grid panes have subtle gray edges and a dashed grid pattern
+    with low alpha. The overall layout is well-balanced with the 3D plot utilizing
+    good canvas space.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, ticks at 14pt - all clearly readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text or elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Wireframe lines visible with good alpha, linewidth could be slightly
+          thicker (1.5 vs recommended 2-3)
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single color (#306998 Python Blue), no colorblind issues
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: 3D plot fills canvas well, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 0
+        max: 2
+        passed: false
+        comment: Uses generic "X", "Y", "Z" without descriptive context or units
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Grid is subtle (alpha=0.3), no legend needed for single-color wireframe
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D wireframe visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y grid correctly mapped to Z height values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines in both X and Y, consistent color, 3D projection with
+          viewing angle
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible, axes show full range
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for single-series wireframe
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{spec-id} · {library} · pyplots.ai"
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: Shows peaks, troughs, and full ripple pattern demonstrating wireframe
+          capabilities
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 7
+        passed: true
+        comment: Uses standard mathematical ripple function, good for demonstrating
+          3D surfaces but abstract
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Values sensible for mathematical function (-6 to 6 range, -1 to 1
+          output)
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) though not strictly needed for deterministic
+          function
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only matplotlib.pyplot and numpy imported
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: All APIs current
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as 'plot.png'
+    library_features:
+      score: 3
+      max: 5
+      items: []
+  verdict: APPROVED
diff --git a/plots/wireframe-3d-basic/metadata/plotly.yaml b/plots/wireframe-3d-basic/metadata/plotly.yaml
index 4de4c1ab57..6075d84601 100644
--- a/plots/wireframe-3d-basic/metadata/plotly.yaml
+++ b/plots/wireframe-3d-basic/metadata/plotly.yaml
@@ -28,3 +28,184 @@ review:
     lines
   - Single uniform color misses opportunity for height-based coloring mentioned as
     optional in spec
+  image_description: 'The plot displays a beautiful 3D wireframe visualization of
+    the mathematical ripple function z = sin(√(x² + y²)). The wireframe uses a consistent
+    Python Blue (#306998) color for all lines, creating an elegant mesh structure
+    with lines running in both x and y directions. The surface shows the characteristic
+    concentric wave pattern radiating from the center, with a peak at z=1 in the middle
+    and oscillating values outward. The title "wireframe-3d-basic · plotly · pyplots.ai"
+    is centered at the top. All three axes are clearly labeled: "X Axis" and "Y Axis"
+    for the horizontal planes, and "Z = sin(√(x² + y²))" for the vertical axis with
+    proper mathematical notation. The viewing angle (~30° elevation, ~45° azimuth)
+    provides excellent visibility of the 3D structure. The light gray background planes
+    with subtle gridlines provide good spatial reference without distraction.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: Title and axis labels are clearly readable at full size; font sizes
+          are appropriate (32pt title, 22pt axis labels, 16pt ticks)
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements; all labels and tick marks are well-positioned
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Wireframe lines are clearly visible with good width (3px); the mesh
+          density (30x30) provides clear structure without overcrowding
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Single Python Blue color scheme is colorblind-safe and provides excellent
+          contrast against the light background
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good use of canvas space; 3D plot is well-centered with appropriate
+          margins; slight deduction for some empty space on sides
+      - id: VQ-06
+        name: Axis Labels
+        score: 1
+        max: 2
+        passed: true
+        comment: Labels are descriptive ("X Axis", "Y Axis", "Z = sin(√(x² + y²))")
+          but X and Y lack units or context
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Subtle grid lines (rgba 0.15 alpha) on background planes; no legend
+          needed for single-color wireframe
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct 3D wireframe plot showing surface as mesh of lines
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: X, Y grid values correctly mapped to Z height via ripple function
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid lines in both x and y directions, consistent line color, proper
+          3D perspective, all axes labeled
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full data range visible; axes show complete -5 to 5 range for X/Y
+          and -1 to 1 for Z
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; N/A for single-color wireframe
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correctly uses "{spec-id} · {library} · pyplots.ai" format
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows wireframe structure well with both positive and negative Z
+          values; demonstrates see-through nature of wireframe; could show more dramatic
+          terrain variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 6
+        max: 7
+        passed: true
+        comment: Ripple function z = sin(√(x² + y²)) is a classic mathematical example
+          for 3D visualization; appropriate for educational context
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Grid size 30x30 is within recommended range; X/Y range of -5 to 5
+          is sensible
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple linear flow: imports → data → plot → save; no functions or
+          classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data using np.linspace and mathematical function (no
+          random values)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only numpy and plotly.graph_objects imported, both used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with correct dimensions (4800x2700 via scale=3)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Correctly uses go.Scatter3d for 3D line rendering; generates HTML
+          for interactivity; could leverage more Plotly-specific features like hover
+          data or color gradients
+  verdict: APPROVED
diff --git a/plots/wordcloud-basic/metadata/altair.yaml b/plots/wordcloud-basic/metadata/altair.yaml
index ac970635e4..338d0624be 100644
--- a/plots/wordcloud-basic/metadata/altair.yaml
+++ b/plots/wordcloud-basic/metadata/altair.yaml
@@ -23,3 +23,174 @@ review:
     light colors on white background
   - Word distribution could be more compact - there is empty space at the bottom of
     the canvas
+  image_description: The plot displays a word cloud with 25 tech industry buzzwords.
+    The largest words "Python", "Data", "Machine", "Learning", "Analytics", and "Cloud"
+    dominate the center, with "Python" being the most prominent in dark blue (#306998).
+    Words are colored using a palette of dark blue (#306998), yellow (#FFD43B), light
+    blue (#4B8BBE), gray (#646464), and variations. Font sizes range from large (Python,
+    Data, Machine) to small (GraphQL, AWS, Terraform). The words are arranged in a
+    roughly circular/spiral pattern filling the center of the canvas, with the title
+    "wordcloud-basic · altair · pyplots.ai" at the top. No axes are present as expected
+    for a word cloud. The layout is clean with no overlapping text.
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 10
+        passed: true
+        comment: Most text is readable, but some smaller yellow words (GraphQL, AWS,
+          Terraform) have lower contrast on white
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all words clearly separated
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Word sizes are well-differentiated, but smallest words could be slightly
+          larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Good color palette with blues, yellows, grays - distinguishable even
+          for colorblind users
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good central filling, but some empty space at bottom of canvas
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for word cloud - no axes needed, full points
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for word cloud - no grid/legend needed, full points
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct word cloud visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Word frequency correctly mapped to font size
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: 'All spec features present: word display, size by frequency, decorative
+          colors'
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All 25 words visible and displayed
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for word cloud - no legend needed
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Uses correct format: wordcloud-basic · altair · pyplots.ai'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good range of frequencies with clear size differentiation
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech industry buzzwords is a realistic and comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Frequency values (22-100) are plausible, good range
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Simple sequential structure: imports → data → positioning → plot
+          → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (altair, numpy, pandas)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Altair API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png but also plot.html (minor extra file)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses Altair's mark_text, encoding system, and declarative approach.
+          However, word clouds are not Altair's strength - creative solution but not
+          showcasing Altair's best features like interactive selections or layered
+          charts
+  verdict: APPROVED
diff --git a/plots/wordcloud-basic/metadata/highcharts.yaml b/plots/wordcloud-basic/metadata/highcharts.yaml
index 8e7216d44b..df9d424bf8 100644
--- a/plots/wordcloud-basic/metadata/highcharts.yaml
+++ b/plots/wordcloud-basic/metadata/highcharts.yaml
@@ -29,3 +29,173 @@ review:
   - Word cloud appears slightly top-heavy in the layout distribution
   - Could benefit from wider weight range for more dramatic size differences between
     common and rare terms
+  image_description: 'The word cloud displays 35 technology-related terms from a developer
+    survey on a white background. The largest word is "python" (in blue, #306998),
+    followed by "javascript" (green), "data" (teal), "cloud" (orange), "api" (purple),
+    and "machine learning" (yellow-gold). Words are arranged at various angles (0°
+    to ~60°) in an archimedean spiral pattern, creating a visually appealing cluster.
+    The color palette includes blue, yellow, purple, cyan, brown, green, and pink
+    - all colorblind-safe choices. The title "Developer Survey · wordcloud-basic ·
+    highcharts · pyplots.ai" appears at the top in bold black text. Smaller terms
+    like "documentation", "architecture", "refactoring", and "opensource" are visible
+    but proportionally smaller. The word cloud fills the center of the canvas well
+    with balanced whitespace.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All words are clearly readable at full size, title is large and bold
+          (64px), word sizes scale appropriately from 24px to 120px
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all words are distinct and readable
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Word sizes are well-adapted to frequencies, good size variation shows
+          hierarchy clearly
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette (blue, yellow, purple, cyan, brown,
+          green, pink) - no red-green conflicts
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good canvas utilization, word cloud fills center well, though slightly
+          more top-heavy
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly disabled (N/A for word clouds), no distracting elements
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct word cloud visualization using Highcharts wordcloud module
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Words mapped to name, frequencies to weight correctly
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Word size by frequency, multiple words (35), proper preprocessing
+          implied
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Full range of weights displayed (12-150)
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend appropriately disabled for word cloud
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "Developer Survey · wordcloud-basic · highcharts
+          · pyplots.ai"'
+    data_quality:
+      score: 17
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 8
+        passed: true
+        comment: Shows good variation in word sizes and 35 terms, but could include
+          more variety in rotation angles or show multi-word phrases more prominently
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: 'Excellent real-world scenario: developer survey results with plausible
+          technology terms'
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Weight values (12-150) create good visual hierarchy, though the range
+          could be slightly wider for more dramatic size differences
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 1
+        max: 3
+        passed: true
+        comment: Code is mostly linear but could be more streamlined; some redundancy
+          with two HTML generations
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Deterministic data (no random generation)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Highcharts API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: true
+        comment: Saves as plot.png ✓ and plot.html
+    library_features:
+      score: 5
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 5
+        max: 5
+        passed: true
+        comment: Excellent use of Highcharts wordcloud module with archimedean spiral,
+          rotation configuration, custom color palette, and tooltip formatting
+  verdict: APPROVED
diff --git a/plots/wordcloud-basic/metadata/letsplot.yaml b/plots/wordcloud-basic/metadata/letsplot.yaml
index 2742fc9a53..d99db36f79 100644
--- a/plots/wordcloud-basic/metadata/letsplot.yaml
+++ b/plots/wordcloud-basic/metadata/letsplot.yaml
@@ -23,3 +23,162 @@ review:
   - Custom word placement algorithm is complex but necessary since lets-plot lacks
     native word cloud support
   - Elixir text appears slightly small compared to other low-frequency words
+  image_description: 'The word cloud displays 26 programming language names arranged
+    in a spiral pattern on a white background. The title "wordcloud-basic · letsplot
+    · pyplots.ai" appears centered at the top in gray text. Word sizes vary according
+    to frequency, with "Python" being the largest (most prominent, displayed in bold
+    blue #306998), followed by "JavaScript" (large, in yellow #FFD43B), "Java" (cyan),
+    "TypeScript" (yellow), and "HTML" (pink/magenta). Smaller words like "Elixir",
+    "Lua", "Clojure" appear at the edges. The color palette cycles through 8 colors:
+    blue, yellow, green, pink, cyan, orange, purple, and gray. All words use bold
+    font and are clearly legible with no overlapping. The layout is well-balanced
+    with words distributed across the canvas.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All words clearly readable, good font sizes ranging from ~9pt to
+          ~26pt scaled appropriately
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: Collision detection algorithm ensures no overlapping words
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Word sizes well-adapted to frequency, clear visual hierarchy
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Uses colorblind-safe palette with good contrast on white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Spiral placement creates balanced distribution, good canvas utilization
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct word cloud visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Word frequency correctly mapped to size
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows word/frequency data, size encoding, appropriate word count
+          (26 words)
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All words visible and within canvas bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: No legend needed; color is decorative as spec allows
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Uses correct format "wordcloud-basic · letsplot · pyplots.ai"
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows variety of word sizes demonstrating frequency encoding; could
+          include more variation in word lengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language popularity is a realistic, recognizable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Frequency values (18-100) are plausible; Python at 100 and JavaScript
+          at 92 reflects real-world trends
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Linear flow: imports → data → placement algorithm → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42)
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves to "plot.png" but also saves plot.html (minor, but correct
+          output present)
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses geom_text with scale_size_identity and scale_color_manual; theme_void
+          is appropriate; however, word clouds are not a native lets-plot feature,
+          so implementation requires custom placement algorithm
+  verdict: APPROVED
diff --git a/plots/wordcloud-basic/metadata/matplotlib.yaml b/plots/wordcloud-basic/metadata/matplotlib.yaml
index 73ea11eda3..10b9b0aeda 100644
--- a/plots/wordcloud-basic/metadata/matplotlib.yaml
+++ b/plots/wordcloud-basic/metadata/matplotlib.yaml
@@ -24,3 +24,160 @@ review:
   - Library Features score limited because WordCloud library does most of the work
     rather than matplotlib-specific features
   - Could benefit from slightly more diverse color usage while remaining accessible
+  image_description: The word cloud displays tech industry skills with a white background
+    and a blue color scheme (Blues colormap). The most prominent words are "Python",
+    "JavaScript", "Data", "Machine Learning", "Cloud", and "Security" - displayed
+    in large, dark blue text. Smaller words like "GraphQL", "Terraform", "Monitoring",
+    "Performance" appear in lighter blue shades and smaller sizes. The title "Tech
+    Skills Survey · wordcloud-basic · matplotlib · pyplots.ai" appears at the top
+    in a clean, readable font. Words are arranged in both horizontal and vertical
+    orientations, filling the canvas nicely with good spacing. No overlapping text
+    is visible.
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All words are clearly readable, title is large and prominent, even
+          smallest words legible
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, all words have proper spacing
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Word sizes appropriately scaled to frequency, some minor improvements
+          possible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blues colormap is colorblind-safe, good contrast on white background
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Words fill the canvas well, balanced distribution
+      - id: VQ-07
+        name: Grid & Legend
+        score: 2
+        max: 2
+        passed: true
+        comment: No grid/legend needed for word cloud, axes turned off appropriately
+    spec_compliance:
+      score: 23
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct word cloud visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Word frequencies correctly determine word sizes
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows word/frequency mapping, proper sizing, color encoding
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All words visible, range from small to large appropriately shown
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Title follows format: "Tech Skills Survey · wordcloud-basic · matplotlib
+          · pyplots.ai"'
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows good range of frequencies, mix of horizontal/vertical, variety
+          of word lengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Tech skills survey is a realistic, relatable scenario for word cloud
+          usage
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Frequencies range from 7 to 150, realistic for survey data
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean structure: imports → data → plot → save, no functions or classes'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses random_state=42 for reproducible word placement
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports (matplotlib.pyplot, WordCloud)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with proper dpi and bbox_inches
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses WordCloud library with matplotlib
+        score: 3
+        max: 5
+        passed: true
+        comment: Good use of generate_from_frequencies and colormap, but this is a
+          combination library approach rather than pure matplotlib distinctive features
+  verdict: APPROVED
diff --git a/plots/wordcloud-basic/metadata/plotly.yaml b/plots/wordcloud-basic/metadata/plotly.yaml
index 3cac2bd3cb..ca467ef2f2 100644
--- a/plots/wordcloud-basic/metadata/plotly.yaml
+++ b/plots/wordcloud-basic/metadata/plotly.yaml
@@ -23,3 +23,172 @@ review:
   weaknesses:
   - Could leverage more Plotly-specific features like custom hover templates or annotations
   - Color palette has multiple similar blue shades that could be more distinctive
+  image_description: 'The word cloud displays 28 programming language names on a white
+    background. Words are sized proportionally to their frequency, with "Python",
+    "JavaScript", "SQL", and "HTML" appearing largest. The color palette uses Python-themed
+    colors: various blues (#306998, #4B8BBE, #5A9BD5), yellows (#FFD43B, #FFE873),
+    and grays (#646464, #7F7F7F). Words are distributed across the canvas with larger
+    words in the center-top area and smaller words around the edges. The title "wordcloud-basic
+    · plotly · pyplots.ai" appears centered at the top in blue. All text is clearly
+    legible with no overlapping elements.'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: All text is perfectly readable, font sizes scale well from 28px to
+          90px
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text, manual positioning ensures clear separation
+      - id: VQ-03
+        name: Element Visibility
+        score: 8
+        max: 8
+        passed: true
+        comment: Word sizes appropriately scaled to frequency, all words visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Good contrast on white, blues and yellows distinguishable, though
+          similar blue shades could be slightly more varied
+      - id: VQ-05
+        name: Layout Balance
+        score: 4
+        max: 5
+        passed: true
+        comment: Good distribution across canvas, though slightly bottom-heavy with
+          empty space at very bottom
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A for word cloud, axes hidden correctly
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: true
+        comment: N/A for word cloud, no grid/legend needed
+    spec_compliance:
+      score: 25
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct word cloud visualization
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Word size correctly maps to frequency values
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Shows words with frequency-based sizing as specified
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All words visible within canvas bounds
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: N/A, no legend needed for word cloud
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Correct format: "wordcloud-basic · plotly · pyplots.ai"'
+    data_quality:
+      score: 18
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows 28 words with good frequency range (8-100), demonstrates size
+          variation well, though could include more varied word lengths
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Programming language popularity is a realistic and relatable scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 5
+        passed: true
+        comment: Frequency values are plausible survey percentages, though some rankings
+          could be debated (e.g., SQL vs TypeScript)
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: 'Clean linear flow: imports → data → plot → save'
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Both random.seed(42) and np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used (random, numpy, plotly.graph_objects)
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current Plotly API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 0
+        passed: true
+        comment: Saves as plot.png with correct dimensions
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Uses distinctive library features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses go.Scatter with text mode and hover interactivity, but does
+          not leverage more advanced Plotly features like animations or custom hover
+          templates
+  verdict: APPROVED